%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Simon Price at 2008-06-19 15:03:49 -0600 


%% Saved with string encoding Unicode (UTF-8) 



@inproceedings{culotta2005,
	Abstract = {Record deduplication is the task of merging database records that refer to the same underlying entity. In relational data-bases, accurate deduplication for records of one type is often dependent on the decisions made for records of other types. Whereas nearly all previous approaches have merged records of different types independently, this work models these inter-dependencies explicitly to collectively deduplicate records of multiple types. We construct a conditional random field model of deduplication that captures these relational dependencies, and then employ a novel relational partitioning algorithm to jointly deduplicate records. For two citation matching datasets, we show that collectively deduplicating paper and venue records results in up to a 30% error reduction in venue deduplication, and up to a 20% error reduction in paper deduplication},
	Address = {New York, NY, USA},
	Author = {Aron Culotta and Andrew McCallum},
	Booktitle = {CIKM '05: Proceedings of the 14th ACM international conference on Information and knowledge management},
	Date-Added = {2008-06-19 15:01:33 -0600},
	Date-Modified = {2008-06-19 15:03:45 -0600},
	Doi = {http://doi.acm.org/10.1145/1099554.1099615},
	Isbn = {1-59593-140-6},
	Keywords = {Deduplication,Record Linkage},
	Location = {Bremen, Germany},
	Pages = {257--258},
	Publisher = {ACM},
	Title = {Joint deduplication of multiple record types in relational data},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1099554.1099615}}

@inproceedings{1193341,
	Abstract = {The main disadvantage of most existing set kernels is that they are based on averaging, which might be inappropriate for problems where only specific elements of the two sets should determine the overall similarity. In this paper we propose a class of kernels for sets of vectors directly exploiting set distance measures and, hence, incorporating various semantics into set kernels and lending the power of regularization to learning in structural domains where natural distance functions exist. These kernels belong to two groups: (i) kernels in the proximity space induced by set distances and (ii) set distance substitution kernels (non-PSD in general). We report experimental results which show that our kernels compare favorably with kernels based on averaging and achieve results similar to other state-of-the-art methods. At the same time our kernels systematically improve over the naive way of exploiting distances.},
	Address = {Washington, DC, USA},
	Author = {Adam Woznica and Alexandros Kalousis and Melanie Hilario},
	Booktitle = {ICDM '06: Proceedings of the Sixth International Conference on Data Mining},
	Date-Added = {2008-06-08 16:04:31 +0100},
	Date-Modified = {2008-06-08 16:05:03 +0100},
	Doi = {http://dx.doi.org/10.1109/ICDM.2006.60},
	Isbn = {0-7695-2701-9},
	Keywords = {Distance,Kernels,Structured Data},
	Pages = {1151--1156},
	Publisher = {IEEE Computer Society},
	Title = {Distances and (Indefinite) Kernels for Sets of Objects},
	Year = {2006},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDM.2006.60}}

@inproceedings{1273626,
	Abstract = {The k-Nearest Neighbors algorithm can be easily adapted to classify complex objects (e.g. sets, graphs) as long as a proper dissimilarity function is given over an input space. Both the representation of the learning instances and the dissimilarity employed on that representation should be determined on the basis of domain knowledge. However, even in the presence of domain knowledge, it can be far from obvious which complex representation should be used or which dissimilarity should be applied on the chosen representation. In this paper we present a framework that allows to combine different complex representations of a given learning problem and/or different dissimilarities defined on these representations. We build on ideas developed previously on metric learning for vectorial data. We demonstrate the utility of our method in domains in which the learning instances are represented as sets of vectors by learning how to combine different set distance measures.},
	Address = {New York, NY, USA},
	Author = {Adam Woznica and Alexandros Kalousis and Melanie Hilario},
	Booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning},
	Date-Added = {2008-06-08 16:01:44 +0100},
	Date-Modified = {2008-06-08 16:02:24 +0100},
	Doi = {http://doi.acm.org/10.1145/1273496.1273626},
	Isbn = {978-1-59593-793-3},
	Keywords = {Distances,Kernels,Structured Data},
	Location = {Corvalis, Oregon},
	Pages = {1031--1038},
	Publisher = {ACM},
	Title = {Learning to combine distances for complex representations},
	Year = {2007},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1273496.1273626}}

@inproceedings{DBLP:conf/ilp/Nienhuys-Cheng97,
	Abstract = {We can use a metric to measure the differences between elements in a domain or subsets of that domain (i.e. concepts). Which particular metric should be chosen, depends on the kind of difference we want to measure. The well known Euclidean metric on realn and its generalizations are often used for this purpose, but such metrics are not always suitable for concepts where elements have some structure different from real numbers. For example, in (Inductive) Logic Programming a concept is often expressed as an Herbrand interpretation of some firstorder language. Every element in an Herbrand interpretation is a ground atom which has a tree structure. We start by defining a metric d on the set of expressions (ground atoms and ground terms), motivated by the structure and complexity of the expressions and the symbols used therein. This metric induces the Hausdorff metric h on the set of all sets of ground atoms, which allows us to measure the distance between Herbrand interpretations. We then give some necessary and some sufficient conditions for an upper bound of h between two given Herbrand interpretations, by considering the elements in their symmetric difference.},
	Author = {Shan-Hwei Nienhuys-Cheng},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1997},
	Date-Added = {2008-06-03 15:51:31 +0100},
	Date-Modified = {2008-06-03 15:52:23 +0100},
	Keywords = {Inductive Logic Programming,Distances},
	Pages = {213-226},
	Title = {Distance Between Herbrand Interpretations: A Measure for Approximations to a Target Concept},
	Year = {1997}}

@proceedings{DBLP:conf/ilp/1997,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 15:50:32 +0100},
	Date-Modified = {2008-06-03 15:50:53 +0100},
	Editor = {Nada Lavrac and Saso Dzeroski},
	Isbn = {3-540-63514-9},
	Keywords = {Distances},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 7th International Workshop,                ILP-97, Prague, Czech Republic, September 17-20, 1997, Proceedings},
	Volume = {1297},
	Year = {1997}}

@inproceedings{DBLP:conf/ilp/Sebag97,
	Abstract = {A distance on the problem domain allows one to tackle some typical goals of machine learning, e.g. classification or conceptual clustering, via robust data analysis algorithms (e.g. k-nearest neighbors or k-means).
A method for building a distance on first-order logic domains is presented in this paper. The distance is constructed from examples expressed as definite or constrained clauses, via a two-step process: a set of d hypotheses is first learnt from the training examples. These hypotheses serve as new descriptors of the problem domain Lambdah: they induce a mapping pgr from ,Ch onto the space of integers INd. The distance between any two examples E and F is finally defined as the Euclidean distance between pgr(E) and pgr(F). The granularity of this hypothesis-driven distance (HDD) is controlled via the user-supplied parameter d.
The relevance of a HDD is evaluated from the predictive accuracy of the k-NN classifier based on this distance. Preliminary experiments demonstrate the potentialities of distance induction, in terms of predictive accuracy, computational cost, and tolerance to noise.
},
	Author = {Mich{\`e}le Sebag},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1997},
	Date-Added = {2008-06-03 15:38:48 +0100},
	Date-Modified = {2008-06-03 15:41:34 +0100},
	Keywords = {Distances,Inductive Logic Programming},
	Pages = {264-272},
	Title = {Distance Induction in First Order Logic},
	Year = {1997}}

@inproceedings{DBLP:conf/ilp/BohnebeckHW98,
	Abstract = {The similarity measures used in first-order IBL so far have been limited to the function-free case. In this paper we show that a lot of predictive power can be gained by allowing lists and other terms in the input representation and designing similarity measures that work directly on these structures. We present an improved similarity measure for the first-order instance based learner Ribl that employs the concept of edit distances to efficiently compute distances between lists and terms, discuss its computational and formal properties, and show that it is empirically superior by a wide margin on a problem from the domain of biochemistry.},
	Author = {Uta Bohnebeck and Tam{\'a}s Horv{\'a}th and Stefan Wrobel},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1998},
	Date-Added = {2008-06-03 15:26:45 +0100},
	Date-Modified = {2008-06-03 15:27:43 +0100},
	Keywords = {Inductive Logic Programming,Distances,Structured Data},
	Pages = {65-79},
	Title = {Term Comparisons in First-Order Similarity Measures},
	Year = {1998}}

@inproceedings{DBLP:conf/ilp/EstruchFHR05,
	Abstract = {Many distance-based methods in machine learning are able to identify similar cases or prototypes from which decisions can be made. The explanation given is usually based on expressions such as ``because case a is similar to case b''. However, a more general or meaningful pattern, such as ``because case a has properties x and y (as b has)'' is usually more difficult to find. Even in this case, the connection of this pattern with the original distance-based method is generally unclear, or even inconsistent. In this paper, we study the connection between the concept of distance (or similarity) and the concept of generalisation. More precisely, we define several conditions which, in our view, a sensible distance-based generalisation must have. From that, we are able to tell whether a generalisation operator for a pattern representation language is consistent with the metric space defined by the underlying distance. We show that there are pattern languages and generalisation operators which comply with these properties for typical data types: nominal, numerical, sets and lists. We also show the relationship between the well-known concepts of lgg and distances between terms, and the definition of generalisation presented in this paper.},
	Author = {Vicent Estruch and C{\'e}sar Ferri and Jos{\'e} Hern{\'a}ndez-Orallo and M. Jos{\'e} Ram\'{\i}rez-Quintana},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/2005},
	Date-Added = {2008-06-03 15:11:58 +0100},
	Date-Modified = {2008-06-03 15:13:37 +0100},
	Ee = {http://dx.doi.org/10.1007/11536314_6},
	Keywords = {Distances,Inductive Logic Programming},
	Pages = {87-102},
	Title = {Distance Based Generalisation},
	Year = {2005}}

@proceedings{DBLP:conf/ilp/2005,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 15:11:58 +0100},
	Date-Modified = {2008-06-03 15:11:58 +0100},
	Editor = {Stefan Kramer and                Bernhard Pfahringer},
	Isbn = {3-540-28177-0},
	Keywords = {Distances},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 15th International Conference,                ILP 2005, Bonn, Germany, August 10-13, 2005, Proceedings},
	Volume = {3625},
	Year = {2005}}

@inproceedings{DBLP:conf/ilp/EstruchFHR06,
	Abstract = {Distance-based methods have been a successful family of machine learning techniques since the inception of the discipline. Basically, the classification or clustering of a new individual is determined by the distance to one or more prototypes. From a comprehensibility point of view, this is not especially problematic in propositional learning where prototypes can be regarded as a good generalisation (pattern) of a group of elements. However, for scenarios with structured data, this is no longer the case. In recent work, we developed a framework to determine whether a pattern computed by a generalisation operator is consistent w.r.t. a distance. In this way, we can determine which patterns can provide a good representation of a group of individuals belonging to a metric space. In this work, we apply this framework to analyse and define minimal distance-based generalisation operators (mg operators) for first-order data. We show that Plotkin's lgg is a mg operator for atoms under the distance introduced by J. Ramon, M. Bruynooghe and W. Van Laer. We also show that this is not the case for clauses with the distance introduced by J. Ramon and M. Bruynooghe. Consequently, we introduce a new mg operator for clauses, which could be used as a base to adapt existing bottom-up methods in ILP.
},
	Author = {Vicent Estruch and C{\'e}sar Ferri and Jos{\'e} Hern{\'a}ndez-Orallo and M. Jos{\'e} Ram\'{\i}rez-Quintana},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/2006},
	Date-Added = {2008-06-03 15:07:12 +0100},
	Date-Modified = {2008-06-03 15:14:01 +0100},
	Ee = {http://dx.doi.org/10.1007/978-3-540-73847-3_21},
	Keywords = {Distances,Inductive Logic Programming},
	Pages = {169-183},
	Title = {Minimal Distance-Based Generalisation Operators for First-Order                Objects},
	Year = {2006}}

@proceedings{DBLP:conf/ilp/2006,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 15:07:12 +0100},
	Date-Modified = {2008-06-03 15:07:12 +0100},
	Editor = {Stephen Muggleton and                Ram{\'o}n P. Otero and                Alireza Tamaddoni-Nezhad},
	Isbn = {978-3-540-73846-6},
	Keywords = {Distances},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 16th International Conference,                ILP 2006, Santiago de Compostela, Spain, August 24-27, 2006,                Revised Selected Papers},
	Volume = {4455},
	Year = {2007}}

@inproceedings{DBLP:conf/ilp/dAmatoFE07,
	Abstract = {Several activities related to semantically annotated resources can be enabled by a notion of similarity, spanning from clustering to retrieval, matchmaking and other forms of inductive reasoning. We propose the definition of a family of semi-distances over the set of objects in a knowledge base which can be used in these activities. In the line of works on distance-induction on clausal spaces, the family is parameterized on a committee of concepts expressed with clauses. Hence, we also present a method based on the idea of simulated annealing to be used to optimize the choice of the best concept committee.},
	Author = {Claudia d'Amato and Nicola Fanizzi and Floriana Esposito},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/2007},
	Date-Added = {2008-06-03 14:48:54 +0100},
	Date-Modified = {2008-06-03 14:56:13 +0100},
	Ee = {http://dx.doi.org/10.1007/978-3-540-78469-2_7},
	Keywords = {Distances,Structured Data,Semantic Integration},
	Pages = {29-38},
	Title = {Induction of Optimal Semantic Semi-distances for Clausal Knowledge Bases},
	Year = {2007}}

@proceedings{DBLP:conf/ilp/2007,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 14:48:54 +0100},
	Date-Modified = {2008-06-03 14:55:40 +0100},
	Editor = {Hendrik Blockeel and  Jan Ramon and  Jude W. Shavlik and Prasad Tadepalli},
	Isbn = {978-3-540-78468-5},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 17th International Conference,                ILP 2007, Corvallis, OR, USA, June 19-21, 2007, Revised Selected Papers},
	Volume = {4894},
	Year = {2008}}

@inproceedings{DBLP:conf/ilp/KirstenW00,
	Abstract = {In this paper, we present an in-depth evaluation of two approaches of extending k-means clustering to work on first-order representations. The first-approach, k-medoids, selects its cluster center from the given set of instances, and is thus limited in its choice of centers. The second approach, k-prototypes, uses a heuristic prototype construction algorithm that is capable of generating new centers. The two approaches are empirically evaluated on a standard benchmark problem with respect to clustering quality and convergence. Results show that in this case indeed the k-medoids approach is a viable and fast alternative to existing agglomerative or top-down clustering approaches even for a small-scale dataset, while k-prototypes exhibited a number of deficiencies.},
	Author = {Mathias Kirsten and                Stefan Wrobel},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/2000},
	Date-Added = {2008-06-03 14:45:27 +0100},
	Date-Modified = {2008-06-03 14:46:24 +0100},
	Ee = {http://link.springer.de/link/service/series/0558/bibs/1866/18660112.htm},
	Keywords = {Distances,Structured Data},
	Pages = {112-129},
	Title = {Extending K-Means Clustering to First-Order Representations},
	Year = {2000}}

@proceedings{DBLP:conf/ilp/2000,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 14:45:27 +0100},
	Date-Modified = {2008-06-03 14:45:27 +0100},
	Editor = {James Cussens and                Alan M. Frisch},
	Isbn = {3-540-67795-X},
	Keywords = {Distances},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 10th International Conference,                ILP 2000, London, UK, July 24-27, 2000, Proceedings},
	Volume = {1866},
	Year = {2000}}

@article{bergamaschi99semantic,
	Abstract = {this paper is to describe the MOMIS [4, 5] (Mediator envirOnment for Multiple Information Sources) approach to the integration and query of multiple, heterogeneous information sources, containing structured and semistructured data. MOMIS has been conceived as a joint collaboration between University of Milano and Modena in the framework of the INTERDATA national research project, aiming at providing methods and tools for data management in Internet-based information systems. Like other...},
	Author = {Sonia Bergamaschi and Silvana Castano and Maurizio Vincini},
	Date-Added = {2008-06-03 14:32:48 +0100},
	Date-Modified = {2008-06-03 14:33:38 +0100},
	Journal = {SIGMOD Record},
	Keywords = {Distances},
	Number = {1},
	Pages = {54-59},
	Title = {Semantic Integration of Semistructured and Structured Data Sources},
	Url = {citeseer.ist.psu.edu/bergamaschi99semantic.html},
	Volume = {28},
	Year = {1999},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/bergamaschi99semantic.html}}

@inproceedings{DBLP:conf/ilp/Sebag98,
	Abstract = {This paper continues a previous work using stochastic heuristics to extract and exploit knowledge with no size restrictions, with polynomial complexity.
A simplified relational framework is described; within this framework, one basic learning component, the generalization operator, is reconsidered.
Stochastic heuristics are combined with Plotkin's least general generalization to derive a stochastic generalization operator and a simple stochastic similarity function with controllable complexity. Preliminary experiments on the well-studied mutagenesis problem (regression-friendly and regression-unfriendly datasets) demonstrate the potential and the limitations of this similarity.
},
	Author = {Mich{\`e}le Sebag},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1998},
	Date-Added = {2008-06-03 14:14:32 +0100},
	Date-Modified = {2008-06-03 15:38:09 +0100},
	Keywords = {Inductive Logic Programming,Distances,Structured Data},
	Pages = {95-105},
	Title = {A Stochastic Simple Similarity},
	Year = {1998}}

@inproceedings{DBLP:conf/ilp/RamonB98,
	Abstract = {Several learning systems, such as systems based on clustering and instance based learning, use a measure of distance between objects. Good measures of distance exist when objects are described by a fixed set of attributes as in attribute value learners. More recent learning systems however, use a first order logic representation. These systems represent objects as models or clauses. This paper develops a general framework for distances between such objects and reports a preliminary evaluation.},
	Author = {Jan Ramon and Maurice Bruynooghe},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1998},
	Date-Added = {2008-06-03 14:11:06 +0100},
	Date-Modified = {2008-06-03 14:11:59 +0100},
	Keywords = {Inductive Logic Programming,Distances,Structured Data},
	Pages = {271-280},
	Title = {A Framework for Defining Distances Between First-Order Logic                Objects},
	Year = {1998}}

@inproceedings{DBLP:conf/ilp/Nienhuys-Cheng98,
	Abstract = {A notion of distances between Herbrand interpretations enables us to measure how good a certain program, learned from examples, approximates some target program. The distance introduced in [10] has the disadvantage that it does not fit the notion of ldquoidentification in the limitrdquo. We use a distance defined by a level mapping [5] to overcome this problem, and study in particular the mapping TII induced by a definite program 11 on the metric space. Continuity of TII holds under certain conditions, and we give a concrete level mapping that satisfies these conditions, based on [10]. This allows us to prove the existence of fixed points without using the Banach Fixed Point Theorem.
},
	Author = {Shan-Hwei Nienhuys-Cheng},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1998},
	Date-Added = {2008-06-03 13:58:13 +0100},
	Date-Modified = {2008-06-03 14:07:25 +0100},
	Keywords = {Inductive Logic Programming,Distances,Structured Data},
	Pages = {250-260},
	Title = {Distances and Limits on Herbrand Interpretations},
	Year = {1998}}

@inproceedings{DBLP:conf/ilp/KirstenW98,
	Abstract = {Work on first-order clustering has primarily been focused on the task of conceptual clustering, i.e., forming clusters with symbolic generalizations in the given representation language. By contrast, for propositional representations, experience has shown that simple algorithms based exclusively on distance measures can often outperform their concept-based counterparts. In this paper, we therefore build on recent advances in the area of first-order distance metrics and present RDBC, a bottom-up agglomerative clustering algorithm for first-order representations that relies on distance information only and features a novel parameter-free pruning measure for selecting the final clustering from the cluster tree. The algorithm can empirically be shown to produce good clusterings (on the mutagenesis domain) that, when used for subsequent prediction tasks, improve on previous clustering results and approach the accuracies of dedicated predictive learners.},
	Author = {Mathias Kirsten and Stefan Wrobel},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Crossref = {DBLP:conf/ilp/1998},
	Date-Added = {2008-06-03 13:40:08 +0100},
	Date-Modified = {2008-06-03 14:09:27 +0100},
	Keywords = {Distances,Structured Data},
	Pages = {261-270},
	Title = {Relational Distance-Based Clustering},
	Year = {1998}}

@proceedings{DBLP:conf/ilp/1998,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ILP},
	Date-Added = {2008-06-03 13:40:08 +0100},
	Date-Modified = {2008-06-03 13:40:08 +0100},
	Editor = {David Page},
	Isbn = {3-540-64738-4},
	Keywords = {Inductive Logic Programming},
	Publisher = {Springer},
	Series = {Lecture Notes in Computer Science},
	Title = {Inductive Logic Programming, 8th International Workshop,                ILP-98, Madison, Wisconsin, USA, July 22-24, 1998, Proceedings},
	Volume = {1446},
	Year = {1998}}

@inproceedings{Elias2005,
	Abstract = {This paper introduces Higher-Order Bayesian Networks, a probabilistic reasoning formalism which combines the efficient reasoning mechanisms of Bayesian Networks with the expressive power of higher-order logics. We discuss how the proposed graphical model is used in order to define a probability distribution semantics over particular families of higher-order terms. We give an example of the application of our method on the Mutagenesis domain, a popular dataset from the Inductive Logic Programming community, showing how we employ probabilistic inference and model learning for the construction of a probabilistic classifier based on Higher-Order Bayesian Networks.},
	Abstract-Url = {http://www.cs.bris.ac.uk/Publications/pub_master.jsp?id=2000553},
	Author = {Elias Gyftodimos and Peter A. Flach},
	Booktitle = {Proceedings of the 6th International Symposium on Intelligent Data Analysis (IDA'06)},
	Date-Added = {2008-04-07 14:07:41 +0100},
	Date-Modified = {2008-04-07 14:12:21 +0100},
	Isbn = {3-540-28795-7},
	Keyword = {Structured Data, Higher Order Logic},
	Keywords = {Higher Order Logic},
	Month = {September},
	Pages = {145--157},
	Publisher = {Springer-Verlag},
	Pubtype = {102},
	Rating = {4},
	Read = {Yes},
	Title = {Combining Bayesian Networks with Higher-Order Data Representations},
	Url = {http://www.cs.bris.ac.uk/Publications/Papers/2000553.pdf},
	Year = {2005},
	Bdsk-Url-1 = {http://www.cs.bris.ac.uk/Publications/Papers/2000553.pdf}}

@book{Date1991,
	Address = {Boston, MA, USA},
	Author = {C. J. Date},
	Date-Added = {2008-04-06 18:16:40 +0100},
	Date-Modified = {2008-04-06 18:18:18 +0100},
	Isbn = {0201556510},
	Keywords = {Relational Database},
	Publisher = {Addison-Wesley Longman Publishing Co., Inc.},
	Rating = {4},
	Read = {Yes},
	Title = {An Introduction to Database Systems},
	Year = {1991}}

@book{Codd1990,
	Author = {E. F. Codd},
	Date-Added = {2008-02-03 21:19:21 +0000},
	Date-Modified = {2008-04-06 18:28:18 +0100},
	Keywords = {Relational Database},
	Publisher = {Addison Wesley},
	Title = {The Relational Model for Database Management, Version 2},
	Year = {1990}}

@techreport{codd1969dr,
	Address = {San Jose},
	Author = {E. F. Codd},
	Date-Added = {2008-02-03 17:48:01 +0000},
	Date-Modified = {2008-02-03 17:48:01 +0000},
	Institution = {IBM},
	Journal = {IBM Research Report},
	Keywords = {Relational Database},
	Number = {JR599},
	Title = {Derivability, Redundancy, and Consistency of Relations stored in Large Data Banks},
	Year = {1969}}

@article{church1940,
	Abstract = {The purpose of the present paper is to give a formulation of the simple theory 
of types1 which incorporates certain features of the calculus of A-conversion.' 
A complete incorporation of the calculus of A-conversion into the theory of 
types is impossible if we require that Ax and juxtaposition shall retain their 
respective meanings as an abstraction operator and as denoting the application 
of function to argument. But the present partial incorporation has certain 
advantages from the point of view of type theory and is offered as being of 
interest on this basis (whatever may be thought of the finally satisfactory 
character of the theory of types as a foundation for logic and mathematics). 
For features of the formulation which are not irnmediately connected with 
the incorporation of A-conversion, we are heavily indebted to Whitehead and 
uss sell,^ Hilbert and ~ckermann,' Hilbert and ~ernays,' and to forerunners of 
these, as the reader familiar with the works in question will recognize.},
	Author = {Alonzo Church},
	Date-Added = {2008-01-13 21:51:43 +0000},
	Date-Modified = {2008-01-13 22:01:11 +0000},
	Journal = {Journal of Symbolic Logic},
	Keywords = {Higher Order Logic},
	Month = {June},
	Number = {2},
	Pages = {56--68},
	Title = {A Formulation of the Simple Theory of Types},
	Url = {http://www.jstor.org/view/00224812/di985045/98p0566g/0},
	Volume = {5},
	Year = {1940},
	Bdsk-Url-1 = {http://www.jstor.org/view/00224812/di985045/98p0566g/0}}

@inproceedings{passerini05kernelsprolog,
	Abstract = {We describe a family of kernels over untyped and typed Prolog ground terms and show that they can be applied for learning in structured domains, presenting experimental results in a QSPR task.},
	Author = {Andrea Passerini and Paolo Frasconi},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30-August 5, 2005},
	Crossref = {DBLP:conf/ijcai/2005},
	Date-Added = {2007-10-20 17:04:36 +0100},
	Date-Modified = {2007-10-20 17:08:31 +0100},
	Editor = {Leslie Pack Kaelbling and Alessandro Saffiotti},
	Ee = {http://www.ijcai.org/papers/post-0228.pdf},
	Keywords = {Structured Data,Kernels},
	Pages = {1626-1627},
	Rating = {1},
	Read = {Yes},
	Title = {Kernels on Prolog Ground Terms.},
	Year = {2005}}

@book{Lloyd2003bh,
	Address = {Secaucus, NJ, USA},
	Author = {John W. Lloyd},
	Date-Added = {2006-09-03 14:27:44 +0100},
	Date-Modified = {2006-09-03 14:31:10 +0100},
	Isbn = {3540420274},
	Keywords = {Structured Data},
	Publisher = {Springer-Verlag New York, Inc.},
	Title = {Logic and Learning},
	Year = {2003}}

@techreport{Euzenat2004qf,
	Abstract = {In this document we provide an overall view of the state of the art in ontology alignment. It is organised as a description of the need for ontology alignment, a presentation of the techniques currently in use for ontology alignment and a presentation of existing systems. The state of the art is not restricted to any discipline and 
consider as some form of ontology alignment the work made on schema matching within the database area for instance.},
	Author = {Jerome Euzenat and Thanh Le Bach and Jesus Barrasa and Paolo Bouquet and Jan De Bo and Rose Dieng and Marc Ehrig and Manfred Hauswirth and Mustafa Jarrar and Ruben Lara and Diana Maynard and Amedeo Napoli and Giorgos Stamou and Heiner Stuckenschmidt and Pavel Shvaiko and Sergio Tessaris and Sven Van Acker and Ilya Zaihrayeu},
	Date-Added = {2006-09-02 22:10:22 +0100},
	Date-Modified = {2006-09-02 22:22:25 +0100},
	Institution = {INRIA},
	Keywords = {Ontology,Schema Matching},
	Title = {State of the art on ontology alignment},
	Type = {KWEB EU-IST-2004-507482 Project Report},
	Url = {http://www.starlab.vub.ac.be/research/projects/knowledgeweb/kweb-223.pdf},
	Year = {2004},
	Bdsk-Url-1 = {http://www.starlab.vub.ac.be/research/projects/knowledgeweb/kweb-223.pdf}}

@inproceedings{Enrique-Alfonseca2002ly,
	Abstract = {We describe here a procedure to combine two different existing techniques for Ontology Enrichment with domain-specific concepts. The resulting algorithm is fully unsupervised, and the level of precision is higher than when they are used separately, so we believe that both algorithms benefit from each other. The experiments have been performed by extending WordNet with concepts extracted from The Lord of the Rings.},
	Address = {Las Palmas},
	Author = {Enrique Alfonseca, Suresh Manandhar},
	Booktitle = {Language Resources and Evaluation (LREC-2002)},
	Date-Added = {2006-09-02 16:46:14 +0100},
	Date-Modified = {2006-09-02 16:47:11 +0100},
	Keywords = {Ontology},
	Title = {Improving an Ontology Refinement Method with Hyponymy Patterns},
	Url = {citeseer.ist.psu.edu/alfonseca02improving.html},
	Year = {2002},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/alfonseca02improving.html}}

@article{Navigli2004zr,
	Abstract = {We present a method and a tool, OntoLearn, aimed at the extraction of domain ontologies from Web sites, and more generally from documents shared among the members of virtual organizations. OntoLearn first extracts a domain terminology from available documents. Then, complex domain terms are semantically interpreted and arranged in a hierarchical fashion. Finally, a general-purpose ontology, WordNet, is trimmed and enriched with the detected domain concepts. The major novel aspect of this approach is semantic interpretation, that is, the association of a complex concept with a complex term . This involves finding the appropriate WordNet concept for each word of a terminological string and the appropriate conceptual relations that hold among the concept components. Semantic interpretation is based on a new word sense disambiguation algorithm, called structural semantic interconnections.},
	Address = {Cambridge, MA, USA},
	Author = {Roberto Navigli and Paola Velardi},
	Date-Added = {2006-09-02 16:41:41 +0100},
	Date-Modified = {2006-09-02 16:42:13 +0100},
	Doi = {http://dx.doi.org/10.1162/089120104323093276},
	Issn = {0891-2017},
	Journal = {Comput. Linguist.},
	Keywords = {Ontology},
	Number = {2},
	Pages = {151--179},
	Publisher = {MIT Press},
	Title = {Learning Domain Ontologies from Document Warehouses and Dedicated Web Sites},
	Volume = {30},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1162/089120104323093276}}

@url{Library-Associations2003vn,
	Author = {International Federation of Library Associations and Institutions},
	Date-Added = {2006-09-02 12:28:42 +0100},
	Date-Modified = {2006-09-02 16:05:17 +0100},
	Lastchecked = {2006},
	Title = {IFLANET: Digital Libraries Metadata Resources},
	Url = {http://www.ifla.org/II/metadata.htm},
	Year = {2003},
	Bdsk-Url-1 = {http://www.ifla.org/II/metadata.htm}}

@article{Doan2004uq,
	Abstract = {Semantic heterogeneity is one of the key challenges in integrating and sharing data across disparate sources, data exchange and migration, data warehousing, model management, the Semantic Web and peer-to-peer databases. Semantic heterogeneity can arise at the schema level and at the data level. At the schema level, sources can differ in relations, attribute and tag names, data normalization, levels of detail, and the coverage of a particular domain. The problem of reconciling schema-level heterogeneity is often referred to as schema matching or schema mapping. At the data level, we find different representations of the same real-world entities (e.g., people, companies, publications, etc.). Reconciling data-level heterogeneity is referred to as data deduplication, record linkage, and entity/object matching. To exacerbate the heterogeneity challenges, schema elements of one source can be represented as data in another. This special issue presents a set of articles that describe recent work on semantic heterogeneity at the schema level.},
	Address = {New York, NY, USA},
	Author = {AnHai Doan and Natalya F. Noy and Alon Y. Halevy},
	Date-Added = {2006-08-29 13:18:25 +0100},
	Date-Modified = {2006-08-29 13:18:54 +0100},
	Doi = {http://doi.acm.org/10.1145/1041410.1041412},
	Issn = {0163-5808},
	Journal = {SIGMOD Rec.},
	Keywords = {Semantic Integration},
	Number = {4},
	Pages = {11--13},
	Publisher = {ACM Press},
	Title = {Introduction to the special issue on semantic integration},
	Volume = {33},
	Year = {2004},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1041410.1041412}}

@manual{Bray2006fk,
	Abstract = {The Extensible Markup Language (XML) is a subset of SGML that is completely described in this document. Its goal is to enable generic SGML to be served, received, and processed on the Web in the way that is now possible with HTML. XML has been designed for ease of implementation and for interoperability with both SGML and HTML.},
	Author = {Tim Bray and Jean Paoli and C. M. Sperberg-McQueen and Eve Maler and Fran{\c c}ois Yergeau},
	Date-Added = {2006-08-29 12:03:15 +0100},
	Date-Modified = {2006-08-29 12:16:22 +0100},
	Keywords = {XML,Technical Specification},
	Month = {August},
	Organization = {{W3C}},
	Title = {{E}xtensible {M}arkup {L}anguage ({XML}) 1.0 (Fourth Edition)},
	Url = {http://www.w3.org/TR/xml},
	Year = {2006},
	Bdsk-Url-1 = {http://www.w3.org/TR/xml}}

@inproceedings{Noy2002qf,
	Abstract = {The appearance of a large number of ontology tools may leave a user looking for an appropriate tool overwhelmed and uncertain on which tool to choose. Thus evaluation and comparison of these tools is important to help users determine which tool is best suited for their tasks. However, there is no "one size fits all" comparison framework for ontology tools: different classes of tools require very different comparison frameworks. For example, ontology-development tools can easily be compared to one another since they all serve the same task: define concepts, instances, and relations in a domain. Tools for ontology merging, mapping, and alignment however are so different from one another that direct comparison may not be possible. They differ in the type of input they require (e.g., instance data or no instance data), the type of output they produce (e.g., one merged ontology, pairs of related terms, articulation rules), modes of interaction and so on. This diversity makes comparing the performance of mapping tools to one another largely meaningless. We present criteria that partition the set of such tools in smaller groups allowing users to choose the set of tools that best fits their tasks. We discuss what resources we as a community need to develop in order to make performance comparisons within each group of merging and mapping tools useful and effective. These resources will most likely come as results of evaluation experiments of stand-alone tools. As an example of such an experiment, we discuss our experiences and results in evaluating PROMPT, an interactive ontology-merging tool. Our experiment produced some of the resources that we can use in more general evaluation. However, it has also shown that comparing the performance of different tools can be difficult since human experts do not agree on how ontologies should be merged, and we do not yet have a good enough metric for comparing ontologies.},
	Author = {N. F. Noy and M. A. Musen},
	Booktitle = {Workshop on Evaluation of Ontology Tools at EKAW'02 (EON2002)},
	Date-Added = {2006-08-28 22:33:41 +0100},
	Date-Modified = {2006-08-28 22:36:46 +0100},
	Keywords = {Ontology},
	Title = {Evaluating Ontology-Mapping Tools: Requirements and Experience},
	Url = {http://smi-web.stanford.edu/auslese/smi-web/reports/SMI-2002-0936.pdf},
	Year = {2002},
	Bdsk-Url-1 = {http://smi-web.stanford.edu/auslese/smi-web/reports/SMI-2002-0936.pdf}}

@article{Doan2003ve,
	Abstract = {On the Semantic Web, data will inevitably come from many different ontologies, and information processing across ontologies is not possible without knowing the semantic mappings between them. Manually finding such mappings is tedious, error-prone, and clearly not possible on the Web scale. Hence the development of tools to assist in the ontology mapping process is crucial to the success of the Semantic Web. We describe GLUE, a system that employs machine learning techniques to find such mappings. Given two ontologies, for each concept in one ontology GLUE finds the most similar concept in the other ontology. We give well-founded probabilistic definitions to several practical similarity measures and show that GLUE can work with all of them. Another key feature of GLUE is that it uses multiple learning strategies, each of which exploits well a different type of information either in the data instances or in the taxonomic structure of the ontologies. To further improve matching accuracy, we extend GLUE to incorporate commonsense knowledge and domain constraints into the matching process. Our approach is thus distinguished in that it works with a variety of well-defined similarity notions and that it efficiently incorporates multiple types of knowledge. We describe a set of experiments on several real-world domains and show that GLUE proposes highly accurate semantic mappings. Finally, we extend GLUE to find complex mappings between ontologies and describe experiments that show the promise of the approach.},
	Address = {Secaucus, NJ, USA},
	Author = {AnHai Doan and Jayant Madhavan and Robin Dhamankar and Pedro Domingos and Alon Halevy},
	Date-Added = {2006-08-28 22:20:07 +0100},
	Date-Modified = {2006-08-28 22:23:25 +0100},
	Doi = {http://dx.doi.org/10.1007/s00778-003-0104-2},
	Issn = {1066-8888},
	Journal = {The VLDB Journal},
	Keywords = {Ontology},
	Month = {November},
	Number = {4},
	Pages = {303--319},
	Publisher = {Springer-Verlag New York, Inc.},
	Title = {Learning to match ontologies on the {S}emantic {W}eb},
	Url = {http://www.springerlink.com/content/e897ajf7e9aeht3w/fulltext.pdf},
	Volume = {12},
	Year = {2003},
	Bdsk-Url-1 = {http://www.springerlink.com/content/e897ajf7e9aeht3w/fulltext.pdf},
	Bdsk-Url-2 = {http://dx.doi.org/10.1007/s00778-003-0104-2}}

@book{Sowa1999ly,
	Abstract = {Sowa integrates logic, philosophy, linguistics, and computer science into this study of knowledge and its various models and implementations. His definitive new book shows how techniques of artificial intelligence, database design, and object-oriented programming help make knowledge explicit in a form that computer systems can use.},
	Author = {Sowa, John F.},
	Date-Added = {2006-08-28 12:41:44 +0100},
	Date-Modified = {2006-08-28 12:43:23 +0100},
	Howpublished = {Hardcover},
	Isbn = {0534949657},
	Keywords = {Ontology},
	Month = {August},
	Publisher = {Course Technology},
	Title = {Knowledge Representation: Logical, Philosophical, and Computational Foundations},
	Year = {1999}}

@article{Zhang2004kx,
	Abstract = {We investigate machine learning methods for automatically integrating objects from different taxonomies into a master taxonomy. This problem is not only currently pervasive on the Web, but is also important to the emerging Semantic Web. A straightforward approach to automating this process would be to build classifiers through machine learning and then use these classifiers to classify objects from the source taxonomies into categories of the master taxonomy. However, conventional machine learning algorithms totally ignore the availability of the source taxonomies. In fact, source and master taxonomies often have common categories under different names or other more complex semantic overlaps. We introduce two techniques that exploit the semantic overlap between the source and master taxonomies to build better classifiers for the master taxonomy. The first technique, Cluster Shrinkage, biases the learning algorithm against splitting source categories by making objects in the same category appear more similar to each other. The second technique, Co-Bootstrapping, tries to facilitate the exploitation of inter-taxonomy relationships by providing category indicator functions as additional features for the objects. Our experiments with real-world Web data show that these proposed add-on techniques can enhance various machine learning algorithms to achieve substantial improvements in performance for taxonomy integration.},
	Author = {Zhang, Dell and Lee, Wee S.},
	Citeulike-Article-Id = {149500},
	Date-Added = {2006-08-28 12:39:08 +0100},
	Date-Modified = {2006-08-30 13:27:27 +0100},
	Doi = {10.1016/j.websem.2004.10.001},
	Journal = {Web Semantics: Science, Services and Agents on the World Wide Web},
	Keywords = {Ontology},
	Month = {December},
	Number = {2},
	Pages = {131--151},
	Priority = {2},
	Title = {Learning to integrate web taxonomies},
	Url = {http://dx.doi.org/10.1016/j.websem.2004.10.001},
	Volume = {2},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.websem.2004.10.001},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.websem.2004.10.001}}

@article{Noy2004zr,
	Abstract = {Semantic integration is an active area of research in several disciplines, such as databases, information-integration, and ontologies. This paper provides a brief survey of the approaches to semantic integration developed by researchers in the ontology community. We focus on the approaches that differentiate the ontology research from other related areas. The goal of the paper is to provide a reader who may not be very familiar with ontology research with introduction to major themes in this research and with pointers to different research projects. We discuss techniques for finding correspondences between ontologies, declarative ways of representing these correspondences, and use of these correspondences in various semantic-integration tasks.},
	Address = {New York, NY, USA},
	Author = {Natalya F. Noy},
	Date-Added = {2006-08-28 12:28:16 +0100},
	Date-Modified = {2006-08-28 12:29:36 +0100},
	Doi = {http://doi.acm.org/10.1145/1041410.1041421},
	Issn = {0163-5808},
	Journal = {SIGMOD Rec.},
	Keywords = {Semantic Integration,Ontology},
	Month = {December},
	Number = {4},
	Pages = {65--70},
	Publisher = {ACM Press},
	Title = {Semantic integration: a survey of ontology-based approaches},
	Url = {http://www.sigmod.org/sigmod/record/issues/0412/13.natasha-10.pdf},
	Volume = {33},
	Year = {2004},
	Bdsk-Url-1 = {http://www.sigmod.org/sigmod/record/issues/0412/13.natasha-10.pdf},
	Bdsk-Url-2 = {http://doi.acm.org/10.1145/1041410.1041421}}

@article{Bernstein2004ys,
	Abstract = {Schema matching identifies elements of two given schemas that correspond to each other. Although there are many algorithms for schema matching, little has been written about building a system that can be used in practice. We describe our initial experience building such a system, a customizable schema matcher called Protoplasm.},
	Address = {New York, NY, USA},
	Author = {Philip A. Bernstein and Sergey Melnik and Michalis Petropoulos and Christoph Quix},
	Date-Added = {2006-08-28 12:21:49 +0100},
	Date-Modified = {2006-08-28 12:23:47 +0100},
	Doi = {http://doi.acm.org/10.1145/1041410.1041417},
	Issn = {0163-5808},
	Journal = {SIGMOD Rec.},
	Keywords = {Schema Matching},
	Month = {December},
	Number = {4},
	Pages = {38--43},
	Publisher = {ACM Press},
	Title = {Industrial-strength schema matching},
	Url = {http://www.sigmod.org/sigmod/record/issues/0412/9.phil-6.pdf},
	Volume = {33},
	Year = {2004},
	Bdsk-Url-1 = {http://www.sigmod.org/sigmod/record/issues/0412/9.phil-6.pdf},
	Bdsk-Url-2 = {http://doi.acm.org/10.1145/1041410.1041417}}

@inproceedings{Maedche2002vn,
	Abstract = {Ontologies now play an important role for many knowledge-intensive applications for which they provide a source of precisely defined terms. However, with their wide-spread usage there come problems concerning their proliferation. Ontology engineers or users frequently have a core ontology that they use, e.g., for browsing or querying data, but they need to extend it with, adapt it to, or compare it with the large set of other ontologies. For the task of detecting and retrieving relevant ontologies, one needs means for measuring the similarity between ontologies. We present a set of ontology similarity measures and a multiple-phase empirical evaluation.},
	Address = {London, UK},
	Author = {Alexander Maedche and Steffen Staab},
	Booktitle = {{EKAW} '02: Proceedings of the 13th International Conference on Knowledge Engineering and Knowledge Management. Ontologies and the Semantic Web},
	Date-Added = {2006-08-28 12:06:08 +0100},
	Date-Modified = {2006-08-28 12:10:14 +0100},
	Isbn = {3-540-44268-5},
	Keywords = {Ontology,Semantic Web},
	Pages = {251--263},
	Publisher = {Springer-Verlag},
	Title = {Measuring Similarity between Ontologies},
	Url = {http://www.aifb.uni-karlsruhe.de/~sst/Research/Publications/ekaw2002-compare.pdf},
	Year = {2002},
	Bdsk-Url-1 = {http://www.aifb.uni-karlsruhe.de/~sst/Research/Publications/ekaw2002-compare.pdf}}

@article{Rahm2001kx,
	Abstract = {Schema matching is a basic problem in many database application domains, such as data integration, E-business, data warehousing, and semantic query processing. In current implementations, schema matching is typically performed manually, which has significant limitations. On the other hand, previous research papers have proposed many techniques to achieve a partial automation of the match operation for specific application domains. We present a taxonomy that covers many of these existing approaches, and we describe the approaches in some detail. In particular, we distinguish between schema-level and instance-level, element-level and structure-level, and language-based and constraint-based matchers. Based on our classification we review some previous match implementations thereby indicating which part of the solution space they cover. We intend our taxonomy and review of past work to be useful when comparing different approaches to schema matching, when developing a new match algorithm, and when implementing a schema matching component.},
	Author = {Erhard Rahm and Philip A. Bernstein},
	Date-Added = {2006-08-28 12:01:58 +0100},
	Date-Modified = {2006-08-28 12:04:41 +0100},
	Journal = {The VLDB Journal The International Journal on Very Large Data Bases},
	Keywords = {Ontology,Schema Matching},
	Month = {December},
	Number = {4},
	Pages = {334--350},
	Title = {A survey of approaches to automatic schema matching},
	Url = {http://www.springerlink.com/content/y3bavwk2t7328hat/fulltext.pdf},
	Volume = {10},
	Year = {2001},
	Bdsk-Url-1 = {http://www.springerlink.com/content/y3bavwk2t7328hat/fulltext.pdf}}

@book{Maedche2002uq,
	Author = {Alexander Maedche},
	Date-Added = {2006-08-28 11:56:04 +0100},
	Date-Modified = {2006-08-28 11:57:47 +0100},
	Keywords = {Ontology,Semantic Web},
	Publisher = {Kluwer Academic Publishers},
	Title = {Ontology Learning for the {S}emantic {W}eb},
	Year = {2002}}

@article{219748,
	Abstract = {Because meaningful sentences are composed of meaningful words, any system that hopes to process natural languages as people do must have information about words and their meanings. This information is traditionally provided through dictionaries, and machine-readable dictionaries are now widely available. But dictionary entries evolved for the convenience of human readers, not for machines. WordNet1 provides a more effective combination of traditional lexicographic information and modern computing. WordNet is an online lexical database designed for use under program control. English nouns, verbs, adjectives, and adverbs are organized into sets of synonyms, each representing a lexicalized concept. Semantic relations link the synonym sets.},
	Address = {New York, NY, USA},
	Author = {George A. Miller},
	Date-Added = {2006-08-28 11:50:36 +0100},
	Date-Modified = {2006-08-28 11:52:29 +0100},
	Doi = {http://doi.acm.org/10.1145/219717.219748},
	Issn = {0001-0782},
	Journal = {Commun. ACM},
	Keywords = {Ontology},
	Number = {11},
	Pages = {39--41},
	Publisher = {ACM Press},
	Title = {{WordNet}: a lexical database for {E}nglish},
	Volume = {38},
	Year = {1995},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/219717.219748}}

@article{Maedche2001fk,
	Abstract = {The Semantic Web relies heavily on the formal ontologies that structure its underlying data for comprehensive and transportable machine understanding. Ontology learning greatly facilitates the construction of ontologies. The authors' view of ontology learning includes a number of complementary disciplines that feed on different types of unstructured, semistructured, and fully structured data to support semiautomatic, cooperative ontology engineering. In addition to discussing their general ontology-learning framework and architecture, the authors give examples of the ontology-learning cycle that they have implemented in their ontology-learning environment, Text-To-Onto, such as ontology learning from free text, dictionaries, or legacy ontologies.},
	Address = {Piscataway, NJ, USA},
	Author = {Alexander Maedche and Steffen Staab},
	Date-Added = {2006-08-28 11:45:42 +0100},
	Date-Modified = {2006-08-28 11:46:56 +0100},
	Doi = {http://dx.doi.org/10.1109/5254.920602},
	Issn = {1541-1672},
	Journal = {IEEE Intelligent Systems},
	Keywords = {Ontology,Semantic Web},
	Number = {2},
	Pages = {72--79},
	Publisher = {IEEE Educational Activities Department},
	Title = {Ontology Learning for the {S}emantic {W}eb},
	Volume = {16},
	Year = {2001},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/5254.920602}}

@inproceedings{lawrence:agents99,
	Address = {New York, NY},
	Author = {Steve Lawrence and Kurt Bollacker and C. Lee Giles},
	Booktitle = {Proceedings of the 3rd International Conference on Autonomous Agents},
	Date-Added = {2006-08-22 12:58:15 +0100},
	Date-Modified = {2006-08-22 12:58:31 +0100},
	Keywords = {Record Linkage,Citation Matching},
	Pages = {392--393},
	Publisher = {ACM Press},
	Title = {Autonomous Citation Matching},
	Year = {May 1999}}

@book{newcombe:book88,
	Author = {H. B. Newcombe},
	Date-Added = {2006-08-22 12:54:54 +0100},
	Date-Modified = {2006-08-22 12:55:24 +0100},
	Keywords = {Record Linkage},
	Publisher = {Oxford University Press},
	Title = {Handbook of record linkage: methods for health and statistical studies, administration, and business},
	Year = 1988}

@inproceedings{baxter:kdd03-wkshp,
	Address = {Washington, DC},
	Author = {Rohan Baxter and Peter Christen and Tim Churches},
	Booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation},
	Date-Added = {2006-08-22 12:48:44 +0100},
	Date-Modified = {2006-08-22 12:48:57 +0100},
	Keywords = {Record Linkage},
	Pages = {25--27},
	Title = {A Comparison of Fast Blocking Methods for Record Linkage},
	Year = {2003}}

@inproceedings{giles:dl98,
	Address = {Pittsburgh, PA},
	Author = {C. Lee Giles and Kurt Bollacker and Steve Lawrence},
	Booktitle = {Proceedings of the 3rd ACM Conference on Digital Libraries},
	Date-Added = {2006-08-22 12:45:33 +0100},
	Date-Modified = {2006-08-22 12:45:57 +0100},
	Keywords = {Record Linkage,Citation Matching},
	Pages = {89--98},
	Title = {{CiteSeer}: An Automatic Citation Indexing System},
	Year = 1998}

@techreport{bilenko:tr02,
	Address = {Austin, TX},
	Author = {Mikhail Bilenko and Raymond J. Mooney},
	Date-Added = {2006-08-22 12:43:14 +0100},
	Date-Modified = {2006-08-22 12:43:37 +0100},
	Institution = {Artificial Intelligence Laboratory, University of Texas at Austin},
	Keywords = {Deduplication},
	Month = feb,
	Number = {AI 02-296},
	Title = {Learning to Combine Trained Distance Metrics for Duplicate Detection in Databases},
	Year = {2002}}

@article{Domingos2006ys,
	Abstract = {We propose a simple approach to combining first-order logic and probabilistic graphical models in a single representation. A Markov logic network (MLN) is a first-order knowledge base with a weight attached to each formula (or clause). Together with a set of constants representing objects in the domain, it specifies a ground Markov network containing one feature for each possible grounding of a first-order formula in the KB, with the corresponding weight. Inference in MLNs is performed by MCMC over the minimal subset of the ground network required for answering the query. Weights are efficiently learned from relational databases by iteratively optimizing a pseudo-likelihood measure. Optionally, additional clauses are learned using inductive logic programming techniques. Experiments with a 
real-world database and knowledge base in a university domain illustrate the promise of this approach. },
	Author = {Pedro Domingos and Matthew Richardson},
	Date-Added = {2006-08-20 23:05:33 +0100},
	Date-Modified = {2006-08-20 23:08:59 +0100},
	Journal = {Machine Learning},
	Keywords = {Logic Programming, Inductive Logic Programming},
	Pages = {107--136},
	Title = {{M}arkov {L}ogic {N}etworks},
	Volume = {62},
	Year = {2006}}

@inproceedings{bhattacharya:sdm06,
	Address = {Bethesda, MD},
	Author = {Indrajit Bhattacharya and Lise Getoor},
	Booktitle = {6th SIAM Conference on Data Mining (SDM-2006)},
	Date-Added = {2006-08-20 23:01:33 +0100},
	Date-Modified = {2006-08-20 23:02:03 +0100},
	Keywords = {Entity Resolution},
	Title = {A Latent {D}irichlet Model for Unsupervised Entity Resolution},
	Year = {2006}}

@inproceedings{parag:mrdm04,
	Author = {Parag Domingos and Pedro Domingos},
	Booktitle = {Proceedings of the 2004 ACM SIGKDD Workshop on Multi-Relational Data Mining},
	Date-Added = {2006-08-20 22:16:02 +0100},
	Date-Modified = {2008-06-08 15:50:43 +0100},
	Editor = {Saso Dzeroski and Hendrik Blockeel},
	Keywords = {Record Linkage},
	Location = {Seattle, WA},
	Month = aug,
	Pages = {31--48},
	Title = {Multi-Relational Record Linkage},
	Url = {http://www-ai.ijs.si/SasoDzeroski/MRDM2004/proceedings/parag.pdf},
	Year = 2004,
	Bdsk-Url-1 = {http://www-ai.ijs.si/SasoDzeroski/MRDM2004/proceedings/parag.pdf}}

@inproceedings{Bhattacharya2005dq,
	Address = {New York, NY, USA},
	Author = {Indrajit Bhattacharya and Lise Getoor},
	Booktitle = {MRDM '05: Proceedings of the 4th international workshop on Multi-relational mining},
	Date-Added = {2006-08-20 21:57:50 +0100},
	Date-Modified = {2006-09-10 20:03:22 +0100},
	Doi = {http://doi.acm.org/10.1145/1090193.1090195},
	Isbn = {1-59593-212-7},
	Keywords = {Entity Resolution},
	Location = {Chicago, Illinois},
	Pages = {3--12},
	Publisher = {ACM Press},
	Title = {Relational clustering for multi-type entity resolution},
	Year = {2005},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1090193.1090195}}

@inproceedings{Pasula2003vn,
	Abstract = {Identity uncertainty is a pervasive problem in real-world data analysis. It arises whenever objects are not labeled with unique identifiers or when those identifiers may not be perceived perfectly. In such cases, two observations may or may not correspond to the same object. In this paper, we consider the problem in the context of citation matching -- the problem of deciding which citations correspond to the same publication. Our approach is based on the use of a relational probability model to define a generative model for the domain, including models of author and title corruption and a probabilistic citation grammar. Identity uncertainty is handled by extending standard models to incorporate probabilities over the possible mappings between terms in the language and objects in the domain. Inference is based on Markov chain Monte Carlo, augmented with specific methods for generating efficient proposals when the domain contains many objects. Results on several citation data sets show that the method outperforms current algorithms for citation matching. The declarative, relational nature of the model also means that our algorithm can determine object characteristics such as author names by combining multiple citations of multiple papers.},
	Address = {Cambridge, MA},
	Author = {Hanna Pasula and Bhaskara Marthi and Brian Milch and Stuart Russell and Ilya Shpitser},
	Booktitle = {Advances in Neural Information Processing 15 (NIPS 2002)},
	Citeulike-Article-Id = {613155},
	Date-Added = {2006-08-20 21:45:09 +0100},
	Date-Modified = {2006-08-20 22:09:33 +0100},
	Editor = {Suzanna Becker and Sebastian Thrun and Klaus Obermayer},
	Keywords = {Identity Uncertainty, Citation Matching},
	Pages = {1401--1408},
	Publisher = {MIT Press},
	Rating = {3},
	Read = {Yes},
	Title = {Identity Uncertainty and Citation Matching},
	Url = {http://www.cs.washington.edu/homes/pasula/papers/nipsnewer.pdf},
	Volume = {15},
	Year = {2003},
	Bdsk-Url-1 = {http://www.cs.washington.edu/homes/pasula/papers/nipsnewer.pdf}}

@manual{Biron2004kx,
	Abstract = {XML Schema: Datatypes is part 2 of the specification of the XML Schema language. It defines facilities for defining datatypes to be used in XML Schemas as well as other XML specifications. The datatype language, which is itself represented in XML 1.0, provides a superset of the capabilities found in XML 1.0 document type definitions (DTDs) for specifying datatypes on elements and attributes.},
	Author = {Paul V. Biron and Ashok Malhotra},
	Date-Added = {2006-08-19 17:43:27 +0100},
	Date-Modified = {2006-08-28 23:20:21 +0100},
	Edition = {{W3C} {R}ecommendation 28 {O}ctober 2004},
	Keywords = {RDF,XML},
	Month = {October},
	Organization = {{W3C}},
	Title = {{XML} {S}chema Part 2: Datatypes Second Edition},
	Url = {http://www.w3.org/TR/xmlschema-2/},
	Year = {2004},
	Bdsk-Url-1 = {http://www.w3.org/TR/xmlschema-2/}}

@manual{Klyne2004uq,
	Abstract = {The Resource Description Framework (RDF) is a framework for representing information in the Web.

RDF Concepts and Abstract Syntax defines an abstract syntax on which RDF is based, and which serves to link its concrete syntax to its formal semantics. It also includes discussion of design goals, key concepts, datatyping, character normalization and handling of URI references.},
	Author = {Graham Klyne and Jeremy J. Carroll},
	Date-Added = {2006-08-19 17:39:57 +0100},
	Date-Modified = {2006-08-19 17:43:18 +0100},
	Edition = {W3C Recommendation 10 February 2004},
	Keywords = {RDF},
	Month = {10},
	Organization = {W3C},
	Title = {{R}esource {D}escription {F}ramework ({RDF}): Concepts and Abstract Syntax},
	Url = {http://www.w3.org/TR/rdf-concepts/},
	Year = {2004},
	Bdsk-Url-1 = {http://www.w3.org/TR/rdf-concepts/}}

@inproceedings{Hayes2004fk,
	Abstract = {RDF Graphs are sets of assertions in the form of subject-predicate-object triples of information resources. Although for simple examples they can be understood intuitively as directed labeled graphs, this representation does not scale well for more complex cases, particularly regarding the central notion of connectivity of resources.
We argue in this paper that there is need for an intermediate representation of RDF to enable the application of well-established methods from graph theory. We introduce the concept of RDF Bipartite Graph and show its advantages as intermediate model between the abstract triple syntax and data structures used by applications. In the light of this model we explore the issues of transformation costs, data/schema-structure, and the notion of RDF connectivity.
},
	Address = {Berlin / Heidelberg},
	Author = {Jonathan Hayes and Claudio Gutierrez},
	Booktitle = {The Semantic Web -- ISWC 2004, Third International Semantic Web Conference, Hiroshima, Japan, Proceedings.},
	Date-Added = {2006-08-19 17:15:42 +0100},
	Date-Modified = {2006-08-19 17:30:14 +0100},
	Editor = {McIlraith, Sheila A. and Plexousakis, Dimitris and Harmelen, Frank van},
	Keywords = {RDF},
	Month = {November},
	Pages = {47-61},
	Publisher = {Springer-Verlag},
	Series = {Lecture Notes in Computer Science},
	Title = {Bipartite Graphs as Intermediate Model for {RDF}},
	Volume = {3298},
	Year = {2004}}

@manual{Prudhommeaux2005vn,
	Abstract = {RDF is a flexible and extensible way to represent information about World Wide Web resources. It is used to represent, among other things, personal information, social networks, metadata about digital artifacts, like music and images, as well as provide a means of integration over disparate sources of information. A standardized query language for RDF data with multiple implementations offers developers and end users a way to write and to consume the results of queries across this wide range of information. Used with a common protocol, applications can access and combine information from across the web.

This document describes the query language part of Protocol And RDF Query Language (SPARQL) for easy access to RDF stores. It is designed to meet the requirements and design objectives described in the W3C RDF Data Access Working Group (DAWG) document "RDF Data Access Use Cases and Requirements".},
	Author = {Eric Prud'hommeaux and Andy Seabourne},
	Date-Added = {2006-08-09 16:15:08 +0100},
	Date-Modified = {2006-08-28 23:19:10 +0100},
	Edition = {{W3C} {W}orking {D}raft 19 {A}pril 2005},
	Keywords = {SPARQL, RDF},
	Month = {April},
	Organization = {{W3C}},
	Title = {{SPARQL} Query Language for {RDF}},
	Url = {http://www.w3.org/TR/rdf-sparql-query/},
	Year = {2005},
	Bdsk-Url-1 = {http://www.w3.org/TR/rdf-sparql-query/}}

@article{Garofalakis2003fk,
	Abstract = {XML is rapidly emerging as the new standard for data representation and exchange on the Web. Unlike 
HTML, tags in XML documents describe the semantics of the data and not how it is to be displayed. In addition, 
an XML document can be accompanied by a Document Type Descriptor (DTD) which plays the role of a schema 
for an XML data collection. DTDs contain valuable information on the structure of documents and thus have a 
crucial role in the efficient storage of XML data, as well as the effective formulation and optimization of XML 
queries. Despite their importance, however, DTDs are not mandatory, and it is frequently possible that documents 
in XML databases will not have accompanying DTDs. In this paper, we propose XTRACT, a novel system for 
inferring a DTD schema for a database of XML documents. Since the DTD syntax incorporates the full expressive 
power of regular expressions, naive approaches typically fail to produce concise and intuitive DTDs. Instead, the 
XTRACT inference algorithms employ a sequence of sophisticated steps that involve: (1) finding patterns in the 
input sequences and replacing them with regular expressions to generate ``general'' candidate DTDs, (2) factoring 
candidate DTDs using adaptations of algorithms from the logic optimization literature, and (3) applying the 
Minimum Description Length (MDL) principle to find the best DTD among the candidates. The results of our 
experiments with real-life and synthetic DTDs demonstrate the effectiveness of XTRACT's approach in inferring 
concise and semantically meaningful DTD schemas for XML databases.},
	Address = {Hingham, MA, USA},
	Author = {Garofalakis, Minos and Gionis, Aristides and Rastogi, Rajeev and Seshadri, S. and Shim, Kyuseok},
	Citeulike-Article-Id = {332555},
	Date-Added = {2006-08-08 22:21:16 +0100},
	Date-Modified = {2006-08-08 22:50:11 +0100},
	Doi = {10.1023/A:1021560618289},
	Issn = {1384-5810},
	Journal = {Data Mining and Knowledge Discovery},
	Keywords = {xml, Ontology},
	Local-Url = {learningDTDfromXML.pdf},
	Month = {January},
	Number = {1},
	Pages = {23--56},
	Publisher = {Kluwer Academic Publishers},
	Rating = {3},
	Read = {Yes},
	Title = {{XTRACT}: Learning {D}ocument {T}ype {D}escriptors from {XML} Document Collections},
	Url = {http://dx.doi.org/10.1023/A:1021560618289},
	Volume = {7},
	Year = {2003},
	Bdsk-Url-1 = {http://dx.doi.org/10.1023/A:1021560618289},
	Bdsk-Url-2 = {http://dx.doi.org/10.1023/A:1021560618289}}

@article{Jaro1989ze,
	Abstract = {A test census of Tampa, Florida and an independent postenumeration survey (PES) were conducted by the U.S. Census Bureau in 1985. The PES was a stratified block sample with heavy emphasis placed on hard-to-count population groups. Matching the individuals in the census to the individuals in the PES is an important aspect of census coverage evaluation and consequently a very important process for any census adjustment operations that might be planned. For such an adjustment to be feasible, record-linkage software had to be developed that could perform matches with a high degree of accuracy and that was based on an underlying mathematical theory. A principal purpose of the PES was to provide an opportunity to evaluate the newly implemented record-linkage system and associated methodology. This article discusses the theoretical and practical issues encountered in conducting the matching operation and presents the results of that operation. A review of the theoretical background of the record-linkage problem provides a framework for discussions of the decision procedure, file blocking, and the independence assumption. The estimation of the parameters required by the decision procedure is an important aspect of the methodology, and the techniques presented provide a practical system that is easily implemented. The matching algorithm (discussed in detail) uses the linear sum assignment model to "pair" the records. The Tampa, Florida, matching methodology is described in the final sections of the article. Included in the discussion are the results of the matching itself, an independent clerical review of the matches and nonmatches, conclusions, problem areas, and future work required.},
	Author = {Mathew A. Jaro},
	Date-Added = {2006-05-24 11:04:54 +0100},
	Date-Modified = {2006-05-24 11:16:22 +0100},
	Journal = {Journal of the American Statistical Association},
	Keywords = {Record Linkage},
	Month = {June},
	Number = {406},
	Pages = {414--420},
	Title = {Advances in Record-Linkage Methodology as Applied to Matching the 1985 Census of {T}ampa, {F}lorida},
	Url = {http://links.jstor.org/sici?sici=0162-1459%28198906%2984%3A406%3C414%3AAIRMAA%3E2.0.CO%3B2-Q},
	Volume = {84},
	Year = {1989},
	Bdsk-Url-1 = {http://links.jstor.org/sici?sici=0162-1459(198906)84:406%3C414:AIRMAA%3E2.0.CO;2-Q}}

@inproceedings{mugg:svilp,
	Abstract = {In this paper we explore a topic which is at the intersection of two areas of Machine Learning: namely Support Vector Machines (SVMs) and Inductive Logic Programming (ILP). We propose a general method for constructing 
kernels for Support Vector Inductive Logic Programming (SVILP). The kernel 
not only captures the semantic and syntactic relational information contained in 
the data but also provides the flexibility of using arbitrary forms of structured 
and non-structured data coded in a relational way. While specialised kernels 
have been developed for strings, trees and graphs our approach uses declarative background knowledge to provide the learning bias. The use of explicitly encoded background knowledge distinguishes SVILP from existing relational 
kernels which in ILP-terms work purely at the atomic generalisation level. The 
SVILP approach is a form of generalisation relative to background knowledge, 
though the final combining function for the ILP-learned clauses is an SVM rather 
than a logical conjunction. We evaluate SVILP empirically against related approaches, including an industry-standard toxin predictor called TOPKAT. Evaluation is conducted on a new broad-ranging toxicity dataset (DSSTox). The experimental results demonstrate that our approach significantly outperforms all other approaches in the study.},
	Author = {S.H. Muggleton and H. Lodhi and A. Amini and M.J.E. Sternberg},
	Booktitle = {Proceedings of the 8th International Conference on Discovery Science},
	Date-Added = {2006-04-12 16:51:31 +0100},
	Date-Modified = {2006-04-12 16:55:02 +0100},
	Keywords = {Kernels, Inductive Logic Programming},
	Pages = {163--175},
	Publisher = {Springer-Verlag},
	Series = {LNAI 3735},
	Title = {Support {V}ector {I}nductive {L}ogic {P}rogramming},
	Url = {http://www.doc.ic.ac.uk/~shm/Papers/svilp.pdf},
	Year = 2005,
	Bdsk-Url-1 = {http://www.doc.ic.ac.uk/~shm/Papers/svilp.pdf}}

@article{Gaertner2004ax,
	Abstract = {This paper brings together two strands of machine learning of increasing importance: kernel methods and highly structured data. We propose a general method for constructing a kernel following the syntactic structure of the data, as defined by its type signature in a higher-order logic. Our main theoretical result is the positive definiteness of any kernel thus defined. We report encouraging experimental results on a range of real-world data sets. By converting our kernel to a distance pseudo-metric for 1-nearest neighbour, we were able to improve the best accuracy from the literature on the Diterpene data set by more than 10%.},
	Address = {Hingham, MA, USA},
	Author = {Thomas Gaertner and John W. Lloyd and Peter A. Flach},
	Date-Added = {2006-04-08 22:46:58 +0100},
	Date-Modified = {2006-04-08 22:55:18 +0100},
	Doi = {http://dx.doi.org/10.1023/B:MACH.0000039777.23772.30},
	Issn = {0885-6125},
	Journal = {Mach. Learn.},
	Keywords = {Structured Data, Kernels, Inductive Logic Programming, Higher Order Logic},
	Local-Url = {http://www.springerlink.com/openurl.asp?genre=article&id=doi:10.1023/B:MACH.0000039777.23772.30},
	Month = {December},
	Number = {3},
	Pages = {205--232},
	Publisher = {Kluwer Academic Publishers},
	Read = {Yes},
	Title = {Kernels and Distances for Structured Data},
	Url = {http://www.cs.bris.ac.uk/Publications/Papers/2000267.pdf},
	Volume = {57},
	Year = {2004},
	Bdsk-Url-1 = {http://www.cs.bris.ac.uk/Publications/Papers/2000267.pdf},
	Bdsk-Url-2 = {http://dx.doi.org/10.1023/B:MACH.0000039777.23772.30}}

@article{citeulike:354359,
	Abstract = {The popularity of the World-Wide Web (WWW) has made 
it a prime vehicle for disseminating information. The rel- 
evance of database concepts to the problems of managing 
and querying this information has led to a significant body 
of recent research addressing these problems. Even though 
the underlying challenge is the one that has been tradition- 
ally addressed by the database community - how to manage 
large volumes of data - the novel context of the WWW forces 
us to significantly extend previous techniques. The primary 
goal of this survey is to classify the different tasks to which 
database concepts have been applied, and to emphasize the 
technical innovations that were required to do so. },
	Author = {Florescu, Daniela and Levy, Alon Y. and Mendelzon, Alberto O.},
	Citeulike-Article-Id = {354359},
	Date-Added = {2006-04-08 22:37:05 +0100},
	Date-Modified = {2006-04-08 22:37:05 +0100},
	Journal = {SIGMOD Record},
	Keywords = {Information Retrieval, Relational Database, Semantic Integration},
	Number = {3},
	Pages = {59--74},
	Title = {Database Techniques for the {W}orld-{W}ide {W}eb: A Survey},
	Url = {http://citeseer.ist.psu.edu/florescu98database.html},
	Volume = {27},
	Year = {1998},
	Bdsk-Url-1 = {http://citeseer.ist.psu.edu/florescu98database.html}}

@inproceedings{fang94identification,
	Abstract = {This paper describes several aspects of the Remote--Exchange project at USC, which focuses on the controlled sharing and exchange of information among autonomous, heterogeneous database systems. The spectrum of heterogeneity which may exist among the components in a federation of database systems is examined, and an approach to accommodating such heterogeneity is described. An overview of the Remote--Exchange experimental system is provided.},
	Annote = {Also published as D. Fang, J. Hammer, and D. Mcleod. The Identification and Resolution of Semantic Heterogenerity in Multidatabase Systems. In Multidatabase Systems: An Advanced Solution for Global Information Sharing, pages 52--60, 1994.},
	Author = {D. Fang and J. Hammer and D. McLeod},
	Booktitle = {Proceedings of the First International Workshop on Interoperability in Multidatabase Systems},
	Date-Added = {2006-04-08 22:37:05 +0100},
	Date-Modified = {2006-04-08 22:37:05 +0100},
	Keywords = {Relational Database, Semantic Integration},
	Month = {April},
	Pages = {136--143},
	Publisher = {IEEE Computer Society Press},
	Title = {The identification and resolution of semantic heterogeneity in multidatabase systems},
	Url = {citeseer.ist.psu.edu/fang94identification.html},
	Year = {1991},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/fang94identification.html}}

@article{Codd1970sv,
	Abstract = {Future users of large data banks must be protected from 
having to know how the data is organized in the machine (the 
internal representation). A prompting service which supplies 
such information is not a satisfactory solution. Activities of users 
at terminals and most application programs should remain 
unaffected when the internal representation of data is changed 
and even when some aspects of the external representation 
are changed. Changes in data representation will often be 
needed as a result of changes in query, update, and report 
traffic and natural growth in the types of stored information. 
Existing noninferential, formatted data systems provide users 
with tree-structured files or slightly more general network 
models of the data. In Section 1, inadequacies of these models 
are discussed. A model based on n-ary relations, a normal 
form for data base relations, and the concept of a universal 
data sublanguage are introduced. In Section 2, certain operations on relations (other than logical inference) are discussed 
and applied to the problems of redundancy and consistency 
in the user's model. },
	Author = {E. F. Codd},
	Date-Added = {2006-04-08 22:37:05 +0100},
	Date-Modified = {2006-04-17 11:48:14 +0100},
	Journal = {Communications of the ACM},
	Keywords = {Relational Database},
	Month = {June},
	Number = {6},
	Pages = {377-387},
	Title = {A relational model of data for large shared data banks},
	Url = {http://portal.acm.org/ft_gateway.cfm?id=362685&type=pdf},
	Volume = {13},
	Year = {1970},
	Bdsk-Url-1 = {http://portal.acm.org/ft_gateway.cfm?id=362685&type=pdf}}

@article{Codd1979pn,
	Abstract = {During the last three or four years several investigators have been exploring ``semantic models'' for 
formatted databases. The intent is to capture (in a more or less formal way) more of the meaning of 
the data so that database design can become more systematic and the database system itself can 
behave more intelligently. Two major thrusts are clear: 
(I) the search for meaningful units that are as small as possible--atomic semantics; 
(2) the search for meaningful units that are larger than the usual n-ary relation-molecular 
semantics. 
In this paper we propose extensions to the relational model to support certain atomic and molecular 
semantics. These extensions represent a synthesis of many ideas from the published work in semantic 
modeling plus the introduction of new rules for insertion, update, and deletion, as well as new algebraic 
operators. 
},
	Author = {E. F. Codd},
	Date-Added = {2006-04-08 22:37:05 +0100},
	Date-Modified = {2006-04-08 22:37:05 +0100},
	Journal = {ACM Transactions on Database Systems, Vol. 4, No. 4, December 1979.},
	Keywords = {Semantic Integration, Relational Database},
	Local-Url = {http://www.scism.sbu.ac.uk/~rmkemp/codd1979.pdf},
	Month = {December},
	Number = {4},
	Pages = {397--434},
	Rating = {4},
	Read = {Yes},
	Title = {Extending the Database Relational Model to Capture More Meaning},
	Url = {http://www.informatik.uni-trier.de/~ley/db/journals/tods/Codd79.html},
	Volume = {4},
	Year = {1979},
	Bdsk-Url-1 = {http://www.informatik.uni-trier.de/~ley/db/journals/tods/Codd79.html}}

@book{citeulike:114719,
	Abstract = {{This is the first comprehensive introduction to Support Vector Machines (SVMs), a new generation learning system based on recent advances in statistical learning theory. Students will find the book both stimulating and accessible, while practitioners will be guided smoothly through the material required for a good grasp of the theory and its applications. The concepts are introduced gradually in accessible and self-contained stages, while the presentation is rigorous and thorough. Pointers to relevant literature and web sites containing software make it an ideal starting point for further study.}},
	Author = {Cristianini, Nello and Shawe-Taylor, John},
	Citeulike-Article-Id = {114719},
	Date-Added = {2006-04-08 22:35:26 +0100},
	Date-Modified = {2006-04-08 22:35:26 +0100},
	Howpublished = {Hardcover},
	Isbn = {0521780195},
	Keywords = {Kernels, Support Vector Machines},
	Month = {March},
	Publisher = {Cambridge University Press},
	Title = {An Introduction to Support Vector Machines and Other Kernel-based Learning Methods},
	Url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike-20\&path=ASIN/0521780195},
	Year = {2000},
	Bdsk-Url-1 = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike-20%5C&path=ASIN/0521780195}}

@book{Shawe-Taylor2004ve,
	Author = {Shawe-Taylor, John and Cristianini, Nello},
	Citeulike-Article-Id = {176994},
	Date-Added = {2006-04-08 22:35:26 +0100},
	Date-Modified = {2006-09-02 22:01:10 +0100},
	Howpublished = {Hardcover},
	Isbn = {0521813972},
	Keywords = {Kernels, Support Vector Machines},
	Month = {June},
	Priority = {0},
	Publisher = {Cambridge University Press},
	Title = {Kernel Methods for Pattern Analysis},
	Url = {http://www.amazon.fr/exec/obidos/ASIN/0521813972/citeulike04-21},
	Year = {2004},
	Bdsk-Url-1 = {http://www.amazon.fr/exec/obidos/ASIN/0521813972/citeulike04-21}}

@article{citeulike:397291,
	Abstract = {At the heart of today's information-explosion problems are issues involving semantics, mutual understanding, 
concept matching, and interoperability. Ontologies and the Semantic Web are offered as a potential solution, but 
creating ontologies for real-world knowledge is nontrivial. If we could automate the process, we could significantly 
improve our chances of making the Semantic Web a reality. While understanding natural language is difficult, 
tables and other structured information make it easier to interpret new items and relations. In this paper we 
introduce an approach to generating ontologies based on table analysis. We thus call our approach TANGO (Table 
ANalysis for Generating Ontologies). Based on conceptual modeling extraction techniques, TANGO attempts to 
(i) understand a table's structure and conceptual content; (ii) discover the constraints that hold between concepts 
extracted from the table; (iii) match the recognized concepts with ones from a more general specification of related 
concepts; and (iv) merge the resulting structure with other similar knowledge representations. TANGO is thus a 
formalized method of processing the format and content of tables that can serve to incrementally build a relevant 
reusable conceptual ontology. 
},
	Author = {Tijerino, Yuri and Embley, David and Lonsdale, Deryle and Ding, Yihong and Nagy, George},
	Citeulike-Article-Id = {397291},
	Date-Added = {2006-04-08 22:33:56 +0100},
	Date-Modified = {2006-04-08 22:33:56 +0100},
	Doi = {10.1007/s11280-005-0360-8},
	Issn = {1386-145X},
	Journal = {World Wide Web},
	Keywords = {Ontology, Semantic Web, Information Extraction},
	Month = {September},
	Number = {3},
	Pages = {261--285},
	Publisher = {Kluwer Academic Publishers},
	Title = {Towards Ontology Generation from Tables},
	Url = {http://www.ingentaconnect.com/content/klu/wwwj/2005/00000008/00000003/00000360},
	Volume = {8},
	Year = {2005},
	Bdsk-Url-1 = {http://www.ingentaconnect.com/content/klu/wwwj/2005/00000008/00000003/00000360},
	Bdsk-Url-2 = {http://dx.doi.org/10.1007/s11280-005-0360-8}}

@article{Getoor2006sg,
	Abstract = {In this paper, we describe the syntax and semantics for a probabilistic relational language (PRL). PRL is a recasting of recent work in Probabilistic Relational Models (PRMs) into a logic programming framework. We show how to represent varying degrees of complexity in the semantics including attribute uncertainty, structural uncertainty and identity uncertainty. Our approach is similar in spirit to the work in Bayesian Logic Programs (BLPs), and Logical Bayesian Networks (LBNs). However, surprisingly, there are still some important differences in the resulting formalism; for example, we introduce a general notion of aggregates based on the PRM approaches. One of our contributions is that we show how to support richer forms of structural uncertainty in a probabilistic logical language than have been previously described. Our goal in this work is to present a unifying framework that supports all of the types of relational uncertainty yet is based on logic programming formalisms. We also believe that it facilitates understanding the relationship between the frame-based approaches and alternate logic programming approaches, and allows greater transfer of ideas between them.},
	Author = {Lise Getoor and John Grant},
	Date-Added = {2006-03-16 22:46:48 +0000},
	Date-Modified = {2006-03-16 22:51:03 +0000},
	Journal = {Machine Learning},
	Keywords = {Logic Programming, Probabilistic Relational Models, Uncertainty},
	Month = {February},
	Number = {1-2},
	Pages = {137--170},
	Rating = {4},
	Title = {{PRL}: A probabilistic relational language},
	Url = {http://www.springerlink.com/openurl.asp?genre=article&eissn=1573-0565&volume=62&issue=1&spage=7},
	Volume = {62},
	Year = {2006},
	Bdsk-Url-1 = {http://www.springerlink.com/openurl.asp?genre=article&eissn=1573-0565&volume=62&issue=1&spage=7}}

@article{Zaki2006ih,
	Abstract = {XML documents have recently become ubiquitous because of their varied appli- 
cability in a number of applications. Classification is an important problem in the data mining 
domain, but current classification methods for XML documents use IR-based methods in 
which each document is treated as a bag of words. Such techniques ignore a significant 
amount of information hidden inside the documents. In this paper we discuss the problem 
of rule based classification of XML data by using frequent discriminatory substructures 
within XML documents. Such a technique is more capable of finding the classification char- 
acteristics of documents. In addition, the technique can also be extended to cost sensitive 
classification. We show the effectiveness of the method with respect to other classifiers. We 
note that the methodology discussed in this paper is applicable to any kind of semi-structured 
data.},
	Author = {Mohammed J. Zaki and Charu C. Aggarwa},
	Date-Added = {2006-03-16 20:42:43 +0000},
	Date-Modified = {2006-03-16 20:48:49 +0000},
	Journal = {Machine Learning},
	Keywords = {Structured Data,Classification,Rule induction,Tree mining},
	Month = {February},
	Number = {1-2},
	Pages = {137--170},
	Rating = {5},
	Title = {{XRules}: An effective algorithm for structural classification of {XML} data},
	Url = {http://www.springerlink.com/openurl.asp?genre=article&eissn=1573-0565&volume=62&issue=1&spage=137},
	Volume = {62},
	Year = {2006},
	Bdsk-Url-1 = {http://www.springerlink.com/openurl.asp?genre=article&eissn=1573-0565&volume=62&issue=1&spage=137}}

@article{newcombe:science59,
	Author = {H.B. Newcombe and J.M. Kennedy and S.J. Axford and A.P. James},
	Date-Added = {2006-03-12 17:36:17 +0000},
	Date-Modified = {2006-03-12 17:36:17 +0000},
	Journal = {Science},
	Keywords = {Record Linkage},
	Pages = {954--959},
	Title = {Automatic Linkage of Vital Records},
	Volume = 130,
	Year = 1959}

@article{winkler88using,
	Abstract = {Let  AxB  be the product space of two sets  A  and  B  which is divided into a (pairs representing the 
same entity) and nonmatches (pairs representing different entities).  Linkage rules are those that 
divide AxB  into links (designated matches), possible links (pairs for which we delay a decision), 
and nonlinks (designated nonmatches).  Under fixed bounds on the error rates, Fellegi and Sunter 
(1969) provided a linkage rule that is optimal in the sense that it minimizes the set of possible links. 
The optimality is dependent on knowledge of certain joint inclusion probabilities that are used in a 
crucial likelihood ratio.  In applying the record linkage model, assumptions are often made that allow 
estimation of weights that are a function of the joint inclusion probabilities.  If the assumptions are 
not met, then the linkage procedure using estimates computed under the assumptions may not be 
optimal.  This paper describes a method for estimating weights using the EM Algorithm under less 
restrictive assumptions.  The weight computation automatically incorporates a Bayesian adjustment 
based on file characteristics.},
	Author = {William E. Winkler},
	Date-Added = {2006-03-12 17:05:27 +0000},
	Date-Modified = {2006-03-12 17:40:55 +0000},
	Journal = {American Statistical Association, Proceedings of the Section on Survey Research Methods},
	Keywords = {Record Linkage},
	Pages = {667-671},
	Rating = {3},
	Read = {Yes},
	Text = {Winkler, W. E., (1988), Using the EM Algorithm for Weight Computation in the Fellegi-Sunter Model of Record Linkage, American Statistical Association, Proceedings of the Section on Survey Research Methods, 667-671.},
	Title = {Using the {EM} Algorithm for Weight Computation in the {F}ellegi-{S}unter Model of Record Linkage},
	Url = {citeseer.ist.psu.edu/article/winkler00using.html},
	Year = {1988},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/article/winkler00using.html}}

@inproceedings{winkler03data,
	Abstract = {Data Cleaning methods are used for finding duplicates within a 
file or across sets of files.  This overview provides background on 
the Fellegi-Sunter model of record linkage.  The Fellegi-Sunter 
model provides an optimal theoretical classification rule.  Fellegi 
and Sunter introduced methods for automatically estimating 
optimal parameters without training data that we extend to many 
real world situations. },
	Author = {William E. Winkler},
	Booktitle = {Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining, August 24-27, 2003, Washington, DC.},
	Date-Added = {2006-03-12 15:36:05 +0000},
	Date-Modified = {2006-03-12 17:36:30 +0000},
	Keywords = {Record Linkage},
	Month = {August},
	Publisher = {ACM Press},
	Rating = {4},
	Read = {Yes},
	Title = {Data Cleaning Methods},
	Url = {citeseer.ist.psu.edu/winkler03data.html},
	Year = {2003},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/winkler03data.html}}

@techreport{winkler99state,
	Abstract = {This paper provides an overview of methods and systems developed for record linkage.  Modern 
record linkage begins with the pioneering work of Newcombe and is especially based on the formal 
mathematical model of Fellegi and Sunter.  In their seminal work, Fellegi and Sunter introduced 
many powerful ideas for estimating record linkage parameters and other ideas that still influence 
record linkage today.  Record linkage research is characterized by its synergism of statistics, 
computer science, and operations research.  Many difficult algorithms have been developed and put 
in software systems.  Record linkage practice is still very limited.  Some limits are due to existing 
software.  Other limits are due to the difficulty in automatically estimating matching parameters 
and error rates, with current research highlighted by the work of Larsen and Rubin.},
	Address = {Statistical Research Division, Room 3000-4, Bureau of the Census, Washington, DC, 20233-9100 USA},
	Author = {William E. Winkler},
	Date-Added = {2006-03-12 15:02:20 +0000},
	Date-Modified = {2006-03-12 17:37:59 +0000},
	Institution = {U. S. Bureau of the Census},
	Keywords = {Record Linkage},
	Organization = {U. S. Bureau of the Census},
	Rating = {4},
	Read = {Yes},
	Text = {W. E. Winkler. The state of record linkage and current research problems. Technical report, Statistical Research Division, U.S. Bureau of the Census, Wachington, DC, 1999.},
	Title = {The state of record linkage and current research problems},
	Url = {citeseer.ist.psu.edu/article/winkler99state.html},
	Year = {1999},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/article/winkler99state.html}}

@article{Fellegi1969bx,
	Abstract = {A mathematical model is developed to provide a theoretical framework for a computer-oriented solution to the problem of recognizing those records in two files which represent identical persons, objects or events (said to be matched). A comparison is to be made between the recorded characteristics and values in two records (one from each file) and a decision made as to whether or not the members of the comparison-pair represent the same person or event, or whether there is insufficient evidence to justify either of these decisions at stipulated levels of error. These three decisions are referred to as link $(A_1)$, a non-link $(A_3)$, and a possible link $(A_2)$. The first two decisions are called positive dispositions. The two types of error are defined as the error of the decision $A_1$ when the members of the comparison pair are in fact unmatched, and the error of the decision $A_3$ when the members of the comparison pair are, in fact matched. The probabilities of these errors are defined as $$\mu = \sum_{\gamma\epsilon\Gamma} u(\gamma)P(A_1\mid\gamma)$$ and $$\lambda = \sum_{\gamma\epsilon\Gamma} m(\gamma)P(A_3\mid\gamma)$$ respectively where $u(\gamma), m(\gamma)$ are the probabilities of realizing $\gamma$ (a comparison vector whose components are the coded agreements and disagreements on each characteristic) for unmatched and matched record pairs respectively. The summation is over the whole comparison space $\Gamma$ of possible realizations. A linkage rule assigns probabilities $P(A_1\mid\gamma)$, and $P(A_2\mid\gamma)$, and $P(A_3\mid\gamma)$ to each possible realization of $\gamma \epsilon \Gamma$. An optimal linkage rule $L(\mu, \lambda, \Gamma)$ is defined for each value of $(\mu, \lambda)$ as the rule that minimizes $P(A_2)$ at those error levels. In other words, for fixed levels of error, the rule minimizes the probability of failing to make positive dispositions. A theorem describing the construction and properties of the optimal linkage rule and two corollaries to the theorem which make it a practical working tool are given.},
	Author = {Ivan P. Fellegi and Alan B. Sunter},
	Date-Added = {2006-03-11 17:48:11 +0000},
	Date-Modified = {2006-03-12 17:40:42 +0000},
	Journal = {American Statistical Association Journal},
	Keywords = {Record Linkage},
	Month = {December},
	Number = {328},
	Pages = {1183--1210},
	Rating = {3},
	Read = {Yes},
	Title = {A Theory For Record Linkage},
	Url = {http://links.jstor.org/sici?sici=0162-1459%28196912%2964%3A328%3C1183%3AATFRL%3E2.0.CO%3B2-T},
	Volume = {64},
	Year = {1969},
	Bdsk-Url-1 = {http://links.jstor.org/sici?sici=0162-1459(196912)64:328%3C1183:ATFRL%3E2.0.CO;2-T}}

@techreport{Malin2005tv,
	Abstract = {Entity resolution, the process of determining if two or more references correspond to the same entity, is an emerging area of study in computer science. While entity resolution models leverage artificial intelligence, machine learning, and data mining techniques, relationships between various models remain ill-specified. Despite growth in both research and literature, investigations are scattered across communities with minimal communication. This paper introduces a conceptual framework, called ENRES, for explicit and formal entity resolution model definition. Through ENRES, we illustrate how several models solve related, though distinctly different, variants of entity resolution. In addition, we prove the existence of entity resolution challenges yet to be addressed by past or current research.},
	Address = {Institute for Software Research International, School of Computer Science, Carnegie Mellon University, Pittsburgh, PA USA 15213-3890},
	Author = {Bradley Malin and Latanya Sweeney},
	Date-Added = {2006-03-11 16:57:45 +0000},
	Date-Modified = {2006-08-28 22:49:16 +0100},
	Institution = {Carnegie Mellon University},
	Keywords = {Entity Resolution,Record Linkage},
	Number = {CMU-ISRI-05-134},
	Rating = {1},
	Read = {Yes},
	Title = {{ENRES}: A Semantic Framework for Entity Resolution Modelling},
	Url = {http://reports-archive.adm.cs.cmu.edu/anon/isri2005/CMU-ISRI-05-134.pdf},
	Year = {2005},
	Bdsk-Url-1 = {http://reports-archive.adm.cs.cmu.edu/anon/isri2005/CMU-ISRI-05-134.pdf}}

@article{Li2005vv,
	Abstract = {Intelligent access to information requires semantic integration of structured 
databases with unstructured textual resources. While the semantic integration 
problem has been widely studied in the database domain on structured data, it has 
not been fully recognized nor studied on unstructured or semi-structured textual 
resources. This paper presents a first step towards this goal by studying seman- 
tic integration in natural language texts --- identifying whether different mentions 
of real world entities, within and across documents, actually represent the same 
concept. 
We present a machine learning study of this problem. The first approach is a 
discriminative approach --- a pairwise local classifier is trained in a supervised way 
to determine whether two given mentions represent the same real world entity. This 
is followed, potentially, by a global clustering algorithm that uses the classifier as 
its similarity metric. Our second approach is a global generative model, at the 
heart of which is a view on how documents are generated and how names (of 
different entity types) are ``sprinkled'' into them. In its most general form, our 
model assumes: (1) a joint distribution over entities (e.g., a document that mentions 
``President Kennedy'' is more likely to mention ``Oswald'' or `` White House'' than 
``Roger Clemens''), (2) an ``author'' model, that assumes that at least one mention 
of an entity in a document is easily identifiable, and then generates other mentions 
via (3) an appearance model, governing how mentions are transformed from the 
``representative'' mention. 
We show that both approaches perform very accurately, in the range of 90% 
 
95% F1 measure for different entity types, much better than previous approaches 
to (some aspects of) this problem.},
	Author = {X. Li and P. Morie and D. Roth},
	Date-Added = {2006-03-11 15:08:26 +0000},
	Date-Modified = {2006-03-11 15:14:21 +0000},
	Journal = {AI Magazine: Special Issue on Semantic Integration},
	Keywords = {Semantic Integration},
	Number = {1},
	Rating = {4},
	Read = {Yes},
	Title = {Semantic integration in text: From ambiguous names to identifiable entities},
	Url = {http://reports-archive.adm.cs.cmu.edu/anon/isri2005/CMU-ISRI-05-134.pdf},
	Volume = {26},
	Year = {2005},
	Bdsk-Url-1 = {http://reports-archive.adm.cs.cmu.edu/anon/isri2005/CMU-ISRI-05-134.pdf}}

@article{cook:holder94subdue,
	Author = {Diane J. Cook and Lawrence B. Holder},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-03-11 15:32:06 +0000},
	Journal = {Journal of Artificial Intelligence Research},
	Keywords = {Structured Data,Knowledge Discovery},
	Pages = {231-255},
	Title = {Substructure Discovery Using Minimum Description Length and Background Knowledge},
	Url = {citeseer.ist.psu.edu/article/cook94substructure.html},
	Volume = {1},
	Year = {1994},
	Bdsk-Url-1 = {citeseer.ist.psu.edu/article/cook94substructure.html}}

@misc{macskassy03simple,
	Author = {S. Macskassy and F. Provost},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-03-11 16:11:32 +0000},
	Keywords = {Networks},
	Text = {S. A. Macskassy and F. Provost, A Simple Relational Classifier, Workshop on Multi-Relational Data Mining in conjunction with KDD-2003 (MRDM-2003), Washington, DC, 2003},
	Title = {A Simple Relational Classifier},
	Url = {http://citeseer.ist.psu.edu/macskassy03simple.html},
	Year = {2003},
	Bdsk-Url-1 = {http://citeseer.ist.psu.edu/macskassy03simple.html}}

@inproceedings{jensen:al2004,
	Address = {New York, NY, USA},
	Author = {David Jensen and Jennifer Neville and Brian Gallagher},
	Booktitle = {KDD '04: Proceedings of the 2004 ACM SIGKDD international conference on Knowledge discovery and data mining},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-03-11 15:20:12 +0000},
	Doi = {http://doi.acm.org/10.1145/1014052.1014125},
	Isbn = {1-58113-888-9},
	Keywords = {Networks},
	Location = {Seattle, WA, USA},
	Pages = {593--598},
	Publisher = {ACM Press},
	Title = {Why collective inference improves relational classification},
	Year = {2004},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/1014052.1014125}}

@inproceedings{Ravikumar:Cohen2004,
	Abstract = {The task of matching co-referent records is known among other names as record linkage. For large record linkage problems, often there is little or no labeled data available, but unlabeled data shows a reasonably clear structure. For such problems, unsupervised or semi-supervised methods are preferable to supervised methods. In this paper, we describe a hierarchical graphical model framework for the record-linkage problem in an unsupervised setting. In addition to proposing new methods, we also cast existing unsupervised probabilistic record linkage methods in this framework. Some of the techniques we propose to minimize overitting in the above model are of interest in the general graphical model setting. We describe a method for incorporating monotonicity constraints in a graphical model. We also outline a bootstrapping approach of using ``single field'' classifiers to noisily label latent variables in a hierarchical model. Experimental results show that our proposed unsupervised methods perform quite competitively even with fully supervised record linkage methods. 
},
	Address = {Arlington, Virginia, United States},
	Author = {Pradeep Ravikumar and William W. Cohen},
	Booktitle = {AUAI '04: Proceedings of the 20th conference on Uncertainty in artificial intelligence},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-08-20 21:36:05 +0100},
	Isbn = {0-9749039-0-6},
	Keywords = {Semantic Integration,Record Linkage},
	Location = {Banff, Canada},
	Pages = {454--461},
	Publisher = {AUAI Press},
	Rating = {3},
	Read = {Yes},
	Title = {A hierarchical graphical model for record linkage},
	Year = {2004}}

@inproceedings{rosen-zvi:al2004,
	Address = {Arlington, Virginia, United States},
	Author = {Michal Rosen-Zvi and Thomas Griffiths and Mark Steyvers and Padhraic Smyth},
	Booktitle = {AUAI '04: Proceedings of the 20th conference on Uncertainty in artificial intelligence},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-03-11 15:26:48 +0000},
	Isbn = {0-9749039-0-6},
	Keywords = {Social Networks},
	Location = {Banff, Canada},
	Pages = {487--494},
	Publisher = {AUAI Press},
	Title = {The author-topic model for authors and documents},
	Year = {2004}}

@inproceedings{wellner:al2004,
	Address = {Arlington, Virginia, United States},
	Author = {Wellner, Ben and McCallum, Andrew and Peng, Fuchun and Hay, Michael},
	Booktitle = {AUAI '04: Proceedings of the 20th conference on Uncertainty in artificial intelligence},
	Citeulike-Article-Id = {149282},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-01-31 09:31:31 +0000},
	Isbn = {0974903906},
	Keywords = {kdl},
	Pages = {593--601},
	Publisher = {AUAI Press},
	Title = {An integrated, conditional model of information extraction and coreference with application to citation matching},
	Url = {http://portal.acm.org/citation.cfm?id=1036915},
	Year = {2004},
	Bdsk-Url-1 = {http://portal.acm.org/citation.cfm?id=1036915}}

@article{newman2001,
	Abstract = {We investigate the structure of scientific collaboration networks. We consider two scientists to be connected if they have authored a paper together, and construct explicit networks of such connections using data drawn from a number of databases, including MEDLINE (biomedical research), the Los Alamos e-Print Archive (physics), and NCSTRL (computer science). We show that these collaboration networks form "small worlds" in which randomly chosen pairs of scientists are typically separated by only a short path of intermediate acquaintances. We further give results for mean and distribution of numbers of collaborators of authors, demonstrate the presence of clustering in the networks, and highlight a number of apparent differences in the patterns of collaboration between the fields studied.},
	Author = {M. E. J. Newman},
	Date-Added = {2006-01-31 09:31:31 +0000},
	Date-Modified = {2006-03-11 15:24:16 +0000},
	Journal = {Proc. Natl. Acad. Sci. USA 98},
	Keywords = {Social Networks},
	Pages = {404--409},
	Title = {The structure of scientific collaboration networks},
	Url = {http://aps.arxiv.org/abs/cond-mat/0007214/},
	Year = {2001},
	Bdsk-Url-1 = {http://aps.arxiv.org/abs/cond-mat/0007214/}}

@inproceedings{DBLP:conf/pakdd/WoznicaKH05,
	Abstract = {In this paper we present a novel and general framework based on concepts of relational algebra for kernel-based learning over relational schema. We exploit the notion of foreign keys to define a new attribute that we call instance-set and we use this type of attribute to define a tree like structured representation of the learning instances. We define kernel functions over relational schemata which are instances of relational instance-based learning algorithm. These kernels can be considered as being defined over typed and unordered trees where elementary kernels are used to compute the graded similarity between nodes. We investigate their formal properties and evaluate the performance of the relational instance-based algorithm on a number of relational datasets. },
	Author = {Adam Woznica and Alexandros Kalousis and Melanie Hilario Alexandros Kalousis and Melanie Hilario},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {Proceedings of the 9th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD-2005)},
	Date-Added = {2005-10-22 17:10:24 +0100},
	Date-Modified = {2008-06-08 16:07:42 +0100},
	Ee = {http://dx.doi.org/10.1007/11430919_68},
	Keywords = {Kernels, Relational Database, Structured Data},
	Pages = {588-598},
	Title = {Kernels over Relational Algebra Structures},
	Url = {http://dx.doi.org/10.1007/11430919_68},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/11430919_68}}

@conference{Nakabasami2002lr,
	Address = {Sardinia, Italy},
	Author = {Nakabasami, C.},
	Booktitle = {International Semantic Web Conference ISWC-2002},
	Date-Added = {2005-09-25 11:52:56 +0100},
	Date-Modified = {2006-03-11 15:51:51 +0000},
	Keywords = {Ontology,Semantic Web,Web Mining},
	Month = {6},
	Title = {An Inductive Approach to Assertional Mining for {W}eb Ontology Revision},
	Url = {http://www.ececs.uc.edu/~mazlack/CS690.f2004/Semantic.Web.Ontology.Papers/nakabasami.pdf},
	Year = {2002},
	Bdsk-Url-1 = {http://www.ececs.uc.edu/~mazlack/CS690.f2004/Semantic.Web.Ontology.Papers/nakabasami.pdf}}

@conference{Kavalec2001sd,
	Address = {Freiburg, Germany},
	Author = {M. Kavalec and V. Svatek and P. Strossa},
	Booktitle = {Semantic Web Mining Workshop, ECML-2001},
	Date-Added = {2005-09-25 11:49:36 +0100},
	Date-Modified = {2008-06-08 15:48:22 +0100},
	Keywords = {Semantic Web,Web Mining},
	Title = {Web Directories as Training Data for Automated Metadata Extraction},
	Url = {http://semwebmine2001.aifb.uni-karlsruhe.de/online/semwebmine04.pdf},
	Year = {2001},
	Bdsk-Url-1 = {http://semwebmine2001.aifb.uni-karlsruhe.de/online/semwebmine04.pdf}}

@conference{Collier2001rw,
	Author = {Nigel Collier},
	Booktitle = {Semantic Web Workshop, 10th International World Wide Web Conference (WWW10)},
	Date-Added = {2005-09-24 10:13:52 +0100},
	Date-Modified = {2006-03-11 15:38:16 +0000},
	Keywords = {Information Extraction,Semantic Web},
	Pages = {29-36},
	Title = {Machine Learning for Information Extraction from {XML} marked-up text on the {S}emantic {W}eb},
	Url = {http://research.nii.ac.jp/~collier/papers/semweb01.pdf},
	Year = {2001},
	Bdsk-Url-1 = {http://research.nii.ac.jp/~collier/papers/semweb01.pdf}}

@inproceedings{Aitken2002ti,
	Address = {Lyon, France},
	Author = {James Stuart Aitken},
	Booktitle = {Proceedings 15th European Conference on Artificial Intelligence},
	Date-Added = {2005-09-24 10:10:10 +0100},
	Date-Modified = {2006-03-11 15:39:22 +0000},
	Editor = {van Harmelen, Prof Frank, Eds.},
	Keywords = {Information Extraction,Inductive Logic Programming},
	Pages = {355-359},
	Title = {Learning Information Extraction Rules: An {I}nductive {L}ogic {P}rogramming approach},
	Year = {2002}}

@periodical{Markey1984en,
	Author = {Karen Markey},
	Date-Added = {2005-09-24 10:00:17 +0100},
	Date-Modified = {2006-03-11 15:42:07 +0000},
	Journal = {Library and Information Science Research},
	Keywords = {Semantic Integration},
	Pages = {155-177},
	Title = {Interindexer Consistency Tests: A Literature Review and Report of a Test of Consistency in Indexing Visual Materials},
	Volume = {6},
	Year = {1984}}

@inproceedings{Haustein2001sa,
	Address = {Freiburg, Germany},
	Author = {Stefan Haustein},
	Booktitle = {Proceedings of the ECML/PKDD Workshop on Semantic Web Mining},
	Date-Added = {2005-09-24 09:49:36 +0100},
	Date-Modified = {2006-03-11 15:23:24 +0000},
	Editor = {Gerd Stumme and Andreas Hotho and Bettina Berendt},
	Keywords = {Ontology,Semantic Web},
	Local-Url = {http://www.informatik.uni-freiburg.de/~ml/ecmlpkdd/WS-Proceedings/w01/index.html},
	Month = {9},
	Title = {Utilising an Ontology Based Repository to Connect {W}eb Miners and Application Agents},
	Url = {http://www.informatik.uni-freiburg.de/~ml/ecmlpkdd/WS-Proceedings/w01/semwebmine07.pdf},
	Year = {2001},
	Bdsk-Url-1 = {http://www.informatik.uni-freiburg.de/~ml/ecmlpkdd/WS-Proceedings/w01/semwebmine07.pdf}}

@webpage{David-Martin2004fq,
	Author = {David Martin},
	Date-Added = {2005-09-24 09:44:03 +0100},
	Date-Modified = {2006-08-28 22:49:31 +0100},
	Keywords = {Technical Specification,Semantic Web,Ontology},
	Local-Url = {http://www.daml.org/services/owl-s/1.1/},
	Month = {11},
	Title = {{OWL-S} 1.1 Release},
	Url = {http://www.daml.org/services/owl-s/},
	Year = {2004},
	Bdsk-Url-1 = {http://www.daml.org/services/owl-s/}}

@webpage{Ogbuji2000qo,
	Author = {Uche Ogbuji},
	Date-Added = {2005-09-24 09:39:19 +0100},
	Date-Modified = {2006-03-11 15:27:25 +0000},
	Keywords = {Semantic Web,Web Services},
	Month = {11},
	Title = {Supercharging {WSDL} with {RDF} - Managing structured {W}eb service metadata},
	Url = {http://www-4.ibm.com/software/developer/library/ws-rdf/},
	Year = {2000},
	Bdsk-Url-1 = {http://www-4.ibm.com/software/developer/library/ws-rdf/}}

@webpage{Chinnici2005vg,
	Author = {Roberto Chinnici and Jean-Jacques Moreau and Arthur Ryman and Sanjiva Weerawarana},
	Date-Added = {2005-09-24 09:36:58 +0100},
	Date-Modified = {2006-03-11 15:27:49 +0000},
	Keywords = {Technical Specification,Web Services},
	Local-Url = {http://www.w3.org/TR/2005/WD-wsdl20-20050803},
	Month = {8},
	Title = {{W}eb {S}ervices {D}escription {L}anguage ({WSDL}) Version 2.0 Part 1: Core Language},
	Url = {http://www.w3.org/TR/wsdl20/},
	Year = {2005},
	Bdsk-Url-1 = {http://www.w3.org/TR/wsdl20/}}

@misc{Berners-Lee2003cx,
	Author = {Tim Berners-Lee},
	Date-Added = {2005-09-24 09:33:11 +0100},
	Date-Modified = {2006-03-11 15:17:34 +0000},
	Howpublished = {WWW-2003, Budapest, keynote},
	Keywords = {Semantic Web},
	Month = {5},
	Organization = {W3C},
	Title = {{W}eb {S}ervices - {S}emantic {W}eb},
	Url = {http://www.w3.org/2003/Talks/0521-www-keynote-tbl/},
	Year = {2003},
	Bdsk-Url-1 = {http://www.w3.org/2003/Talks/0521-www-keynote-tbl/}}

@misc{McGuinness2004jy,
	Author = {Deborah L. McGuinness and Frank van Harmelen},
	Date-Added = {2005-09-24 09:29:33 +0100},
	Date-Modified = {2008-06-18 02:49:58 -0600},
	Keywords = {Technical Specification,Semantic Web,Ontology},
	Month = {2},
	Title = {{OWL} {W}eb {O}ntology {L}anguage Overview},
	Url = {http://www.w3.org/TR/owl-features/},
	Year = {2004},
	Bdsk-Url-1 = {http://www.w3.org/TR/owl-features/}}

@webpage{Connolly2001ze,
	Author = {Dan Connolly and Frank van Harmelen and Ian Horrocks and Deborah L. McGuinness and Peter F. Patel-Schneider and Lynn Andrea Stein},
	Date-Added = {2005-09-24 09:25:53 +0100},
	Date-Modified = {2006-03-11 15:51:25 +0000},
	Keywords = {Ontology,Semantic Web,Technical Specification},
	Month = {12},
	Title = {{DAML+OIL} Reference Description},
	Url = {http://www.w3.org/TR/daml+oil-reference},
	Year = {2001},
	Bdsk-Url-1 = {http://www.w3.org/TR/daml+oil-reference}}

@webpage{Library-Associations2005ys,
	Author = {{International Federation of Library Associations and Institutions}},
	Date-Added = {2005-09-23 22:23:05 +0100},
	Date-Modified = {2006-09-02 16:23:39 +0100},
	Keywords = {Ontology,Metadata,Data Sets},
	Month = {7},
	Title = {{IFLANET} - DIGITAL LIBRARIES: Metadata Resources},
	Url = {http://www.ifla.org/II/metadata.htm},
	Year = {2005},
	Bdsk-Url-1 = {http://www.ifla.org/II/metadata.htm}}

@misc{Brickley2004cv,
	Author = {Dan Brickley and R.V. Guha and Brian McBride},
	Date-Added = {2005-09-23 22:17:29 +0100},
	Date-Modified = {2008-06-08 16:10:55 +0100},
	Keywords = {Technical Specification,Semantic Web,RDF},
	Local-Url = {http://www.w3.org/TR/2004/REC-rdf-schema-20040210/},
	Month = {2},
	Title = {{RDF} Vocabulary Description Language 1.0: {RDF} {S}chema},
	Url = {http://www.w3.org/TR/rdf-schema/},
	Year = {2004},
	Bdsk-Url-1 = {http://www.w3.org/TR/rdf-schema/}}

@webpage{Beckett2002xo,
	Author = {Beckett, D. and Miller, E. and Brickley, D.},
	Date-Added = {2005-09-23 22:15:16 +0100},
	Date-Modified = {2006-03-11 15:45:43 +0000},
	Keywords = {Metadata,Semantic Web,Technical Specification},
	Month = {7},
	Title = {Expressing Simple {D}ublin {C}ore in {RDF}/{XML}},
	Url = {http://dublincore.org/documents/2002/07/31/dcmes-xml/},
	Year = {2002},
	Bdsk-Url-1 = {http://dublincore.org/documents/2002/07/31/dcmes-xml/}}

@webpage{Brickley2000co,
	Author = {Brickley, D. and R.V. Guha and Rael Dornfest and Gabe Beged-Dov and Ian Davis and Leigh Dodds and Jonathan Eisenzopf and David Galbraith and Ken MacLeod and Eric Miller and Aaron Swartz and Eric van der Vlist},
	Date-Added = {2005-09-23 22:07:28 +0100},
	Date-Modified = {2006-08-19 17:49:00 +0100},
	Keywords = {Technical Specification,Semantic Web,RDF},
	Title = {{RDF} {S}ite {S}ummary ({RSS}) 1.0},
	Url = {http://purl.org/rss/1.0/spec},
	Year = {2000},
	Bdsk-Url-1 = {http://purl.org/rss/1.0/spec}}

@webpage{Koivunen2001gf,
	Author = {M. Koivunen and E. Miller},
	Date-Added = {2005-09-22 22:59:00 +0100},
	Date-Modified = {2006-08-09 16:48:11 +0100},
	Keywords = {Semantic Web},
	Month = {12},
	Title = {{W3C} {S}emantic {W}eb {A}ctivity},
	Url = {http://www.w3.org/2001/12/semweb-fin/w3csw},
	Year = {2001},
	Bdsk-Url-1 = {http://www.w3.org/2001/12/semweb-fin/w3csw}}

@inbook{Flach2003eb,
	Author = {Flach, P.A. and Lavrac, N.},
	Chapter = {Rule Induction},
	Date-Added = {2005-09-22 22:52:57 +0100},
	Date-Modified = {2006-03-11 15:34:39 +0000},
	Edition = {2},
	Editor = {Berthold, M. and Hand, D.},
	Keywords = {Structured Data},
	Month = {January},
	Pages = {229-267},
	Publisher = {Springer-Verlag},
	Title = {Intelligent Data Analysis},
	Year = {2003}}

@inproceedings{wielemaker03prologbased,
	Address = {Berlin Heidelberg},
	Annote = {Sanibel Island, Florida, USA, October 20-23, 2003},
	Author = {J. Wielemaker and G. Schreiber and B. Wielinga},
	Booktitle = {The Semantic Web-ISWC2003},
	Date-Added = {2005-09-22 22:40:18 +0100},
	Date-Modified = {2006-03-11 15:39:46 +0000},
	Editor = {J. Mylopoulos and D. Fensel and K. Sycara},
	Keywords = {Semantic Web,Logic Programming,Technical Specification},
	Pages = {644-658},
	Publisher = {Springer-Verlag},
	Series = {LNCS},
	Text = {J. Wielemaker, G. Schreiber, and B. Wielinga. Prolog-Based Infrastructure for RDF: Scalability and Performance. In The SemanticWeb - ISWC 2003, pages 644 -- 658, Sanibel Island, Florida, USA, October 20-23, 2003. Springer-Verlag Heidelberg.},
	Title = {Prolog-Based Infrastructure for {RDF}: Scalability and Performance},
	Url = {http://www.cs.vu.nl/~guus/papers/Wielemaker03a.pdf},
	Volume = {2870},
	Year = {2003},
	Bdsk-Url-1 = {http://www.cs.vu.nl/~guus/papers/Wielemaker03a.pdf}}

@webpage{Beckett2003ys,
	Author = {Beckett, D.},
	Date-Added = {2005-09-22 22:21:01 +0100},
	Date-Modified = {2006-08-19 17:48:26 +0100},
	Keywords = {Technical Specification,Semantic Web,RDF},
	Month = {1},
	Title = {{RDF}/{XML} Syntax Specification (Revised)},
	Url = {http://www.w3.org/TR/2003/WD-rdf-syntax-grammar-20030123},
	Year = {2003},
	Bdsk-Url-1 = {http://www.w3.org/TR/2003/WD-rdf-syntax-grammar-20030123}}

@webpage{Manola2003ul,
	Author = {Manola, F. and Miller, E.},
	Date-Added = {2005-09-10 20:52:33 +0100},
	Date-Modified = {2006-08-28 22:48:49 +0100},
	Keywords = {Semantic Web,RDF},
	Month = {1},
	Title = {{RDF} Primer},
	Url = {http://www.w3.org/TR/2003/WD-rdf-primer-20030123/},
	Urldate = {23/1/2003},
	Year = {2003},
	Bdsk-Url-1 = {http://www.w3.org/TR/2003/WD-rdf-primer-20030123/}}

@webpage{Lassilaxq1999,
	Author = {O. Lassila and R. Swick},
	Date-Added = {2005-09-10 20:49:53 +0100},
	Date-Modified = {2008-06-08 15:48:43 +0100},
	Keywords = {Technical Specification,Semantic Web},
	Month = {2},
	Title = {Resource description framework ({RDF}). {W3C} {R}ecommendation},
	Url = {http://www.w3.org/TR/1999/REC-rdf-syntax-19990222},
	Urldate = {22/2/1999},
	Year = {1999},
	Bdsk-Url-1 = {http://www.w3.org/TR/1999/REC-rdf-syntax-19990222}}

@webpage{IETFuv1998,
	Author = {T. Berners-Lee},
	Date-Added = {2005-09-10 20:47:21 +0100},
	Date-Modified = {2006-03-11 15:45:10 +0000},
	Keywords = {Technical Specification,Semantic Web},
	Month = {8},
	Title = {{IETF} {RFC} 2396, {U}niform {R}esource {I}dentifiers ({URI}): Generic Syntax},
	Url = {http://www.ietf.org/rfc/rfc2396.txt},
	Year = {1998},
	Bdsk-Url-1 = {http://www.ietf.org/rfc/rfc2396.txt}}

@article{Berners-Lee2001mb,
	Author = {Berners-Lee, T. and Hendler, J. and Lassila, O.T.},
	Date-Added = {2005-09-10 20:43:35 +0100},
	Date-Modified = {2006-03-11 15:26:29 +0000},
	Journal = {Scientific American},
	Keywords = {Semantic Web},
	Month = {May},
	Title = {The {S}emantic {W}eb},
	Url = {http://www.sciam.com/article.cfm?articleID=00048144-10D2-1C70-84A9809EC588EF21&ref=sciam},
	Year = {2001},
	Bdsk-Url-1 = {http://www.sciam.com/article.cfm?articleID=00048144-10D2-1C70-84A9809EC588EF21&ref=sciam}}
